• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2019 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #ifdef SUPPORT_JIT
49 
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53 
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57 
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63 
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66 
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72 
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78 
79 #include "sljit/sljitLir.c"
80 
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84 
85 /* Defines for debugging purposes. */
86 
87 /* 1 - Use unoptimized capturing brackets.
88    2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90 
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93 
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97 
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101 
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106 
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109 
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115 
116   'ab' - 'a' and 'b' regexps are concatenated
117   'a+' - 'a' is the sub-expression of the '+' operator
118 
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124 
125  Greedy star operator (*) :
126    Matching path: match happens.
127    Backtrack path: match failed.
128  Non-greedy star operator (*?) :
129    Matching path: no need to perform a match.
130    Backtrack path: match is required.
131 
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135 
136    A(B|C)D
137 
138 The generated code will be the following:
139 
140  A matching path
141  '(' matching path (pushing arguments to the stack)
142  B matching path
143  ')' matching path (pushing arguments to the stack)
144  D matching path
145  return with successful match
146 
147  D backtrack path
148  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149  B backtrack path
150  C expected path
151  jump to D matching path
152  C backtrack path
153  A backtrack path
154 
155  Notice, that the order of backtrack code paths are the opposite of the fast
156  code paths. In this way the topmost value on the stack is always belong
157  to the current backtrack code path. The backtrack path must check
158  whether there is a next alternative. If so, it needs to jump back to
159  the matching path eventually. Otherwise it needs to clear out its own stack
160  frame and continue the execution on the backtrack code paths.
161 */
162 
163 /*
164 Saved stack frames:
165 
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170 
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173 
174 Thus we can restore the private data to a particular point in the stack.
175 */
176 
177 typedef struct jit_arguments {
178   /* Pointers first. */
179   struct sljit_stack *stack;
180   PCRE2_SPTR str;
181   PCRE2_SPTR begin;
182   PCRE2_SPTR end;
183   pcre2_match_data *match_data;
184   PCRE2_SPTR startchar_ptr;
185   PCRE2_UCHAR *mark_ptr;
186   int (*callout)(pcre2_callout_block *, void *);
187   void *callout_data;
188   /* Everything else after. */
189   sljit_uw offset_limit;
190   sljit_u32 limit_match;
191   sljit_u32 oveccount;
192   sljit_u32 options;
193 } jit_arguments;
194 
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196 
197 typedef struct executable_functions {
198   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201   sljit_u32 top_bracket;
202   sljit_u32 limit_match;
203 } executable_functions;
204 
205 typedef struct jump_list {
206   struct sljit_jump *jump;
207   struct jump_list *next;
208 } jump_list;
209 
210 typedef struct stub_list {
211   struct sljit_jump *start;
212   struct sljit_label *quit;
213   struct stub_list *next;
214 } stub_list;
215 
216 enum frame_types {
217   no_frame = -1,
218   no_stack = -2
219 };
220 
221 enum control_types {
222   type_mark = 0,
223   type_then_trap = 1
224 };
225 
226 enum  early_fail_types {
227   type_skip = 0,
228   type_fail = 1,
229   type_fail_range = 2
230 };
231 
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233 
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239   /* Concatenation stack. */
240   struct backtrack_common *prev;
241   jump_list *nextbacktracks;
242   /* Internal stack (for component operators). */
243   struct backtrack_common *top;
244   jump_list *topbacktracks;
245   /* Opcode pointer. */
246   PCRE2_SPTR cc;
247 } backtrack_common;
248 
249 typedef struct assert_backtrack {
250   backtrack_common common;
251   jump_list *condfailed;
252   /* Less than 0 if a frame is not needed. */
253   int framesize;
254   /* Points to our private memory word on the stack. */
255   int private_data_ptr;
256   /* For iterators. */
257   struct sljit_label *matchingpath;
258 } assert_backtrack;
259 
260 typedef struct bracket_backtrack {
261   backtrack_common common;
262   /* Where to coninue if an alternative is successfully matched. */
263   struct sljit_label *alternative_matchingpath;
264   /* For rmin and rmax iterators. */
265   struct sljit_label *recursive_matchingpath;
266   /* For greedy ? operator. */
267   struct sljit_label *zero_matchingpath;
268   /* Contains the branches of a failed condition. */
269   union {
270     /* Both for OP_COND, OP_SCOND. */
271     jump_list *condfailed;
272     assert_backtrack *assert;
273     /* For OP_ONCE. Less than 0 if not needed. */
274     int framesize;
275     /* For brackets with >3 alternatives. */
276     struct sljit_put_label *matching_put_label;
277   } u;
278   /* Points to our private memory word on the stack. */
279   int private_data_ptr;
280 } bracket_backtrack;
281 
282 typedef struct bracketpos_backtrack {
283   backtrack_common common;
284   /* Points to our private memory word on the stack. */
285   int private_data_ptr;
286   /* Reverting stack is needed. */
287   int framesize;
288   /* Allocated stack size. */
289   int stacksize;
290 } bracketpos_backtrack;
291 
292 typedef struct braminzero_backtrack {
293   backtrack_common common;
294   struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296 
297 typedef struct char_iterator_backtrack {
298   backtrack_common common;
299   /* Next iteration. */
300   struct sljit_label *matchingpath;
301   union {
302     jump_list *backtracks;
303     struct {
304       unsigned int othercasebit;
305       PCRE2_UCHAR chr;
306       BOOL enabled;
307     } charpos;
308   } u;
309 } char_iterator_backtrack;
310 
311 typedef struct ref_iterator_backtrack {
312   backtrack_common common;
313   /* Next iteration. */
314   struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316 
317 typedef struct recurse_entry {
318   struct recurse_entry *next;
319   /* Contains the function entry label. */
320   struct sljit_label *entry_label;
321   /* Contains the function entry label. */
322   struct sljit_label *backtrack_label;
323   /* Collects the entry calls until the function is not created. */
324   jump_list *entry_calls;
325   /* Collects the backtrack calls until the function is not created. */
326   jump_list *backtrack_calls;
327   /* Points to the starting opcode. */
328   sljit_sw start;
329 } recurse_entry;
330 
331 typedef struct recurse_backtrack {
332   backtrack_common common;
333   /* Return to the matching path. */
334   struct sljit_label *matchingpath;
335   /* Recursive pattern. */
336   recurse_entry *entry;
337   /* Pattern is inlined. */
338   BOOL inlined_pattern;
339 } recurse_backtrack;
340 
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342 
343 typedef struct then_trap_backtrack {
344   backtrack_common common;
345   /* If then_trap is not NULL, this structure contains the real
346   then_trap for the backtracking path. */
347   struct then_trap_backtrack *then_trap;
348   /* Points to the starting opcode. */
349   sljit_sw start;
350   /* Exit point for the then opcodes of this alternative. */
351   jump_list *quit;
352   /* Frame size of the current alternative. */
353   int framesize;
354 } then_trap_backtrack;
355 
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358 
359 typedef struct fast_forward_char_data {
360   /* Number of characters in the chars array, 255 for any character. */
361   sljit_u8 count;
362   /* Number of last UTF-8 characters in the chars array. */
363   sljit_u8 last_count;
364   /* Available characters in the current position. */
365   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367 
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370 
371 typedef struct compiler_common {
372   /* The sljit ceneric compiler. */
373   struct sljit_compiler *compiler;
374   /* Compiled regular expression. */
375   pcre2_real_code *re;
376   /* First byte code. */
377   PCRE2_SPTR start;
378   /* Maps private data offset to each opcode. */
379   sljit_s32 *private_data_ptrs;
380   /* Chain list of read-only data ptrs. */
381   void *read_only_data_head;
382   /* Tells whether the capturing bracket is optimized. */
383   sljit_u8 *optimized_cbracket;
384   /* Tells whether the starting offset is a target of then. */
385   sljit_u8 *then_offsets;
386   /* Current position where a THEN must jump. */
387   then_trap_backtrack *then_trap;
388   /* Starting offset of private data for capturing brackets. */
389   sljit_s32 cbra_ptr;
390   /* Output vector starting point. Must be divisible by 2. */
391   sljit_s32 ovector_start;
392   /* Points to the starting character of the current match. */
393   sljit_s32 start_ptr;
394   /* Last known position of the requested byte. */
395   sljit_s32 req_char_ptr;
396   /* Head of the last recursion. */
397   sljit_s32 recursive_head_ptr;
398   /* First inspected character for partial matching.
399      (Needed for avoiding zero length partial matches.) */
400   sljit_s32 start_used_ptr;
401   /* Starting pointer for partial soft matches. */
402   sljit_s32 hit_start;
403   /* Pointer of the match end position. */
404   sljit_s32 match_end_ptr;
405   /* Points to the marked string. */
406   sljit_s32 mark_ptr;
407   /* Recursive control verb management chain. */
408   sljit_s32 control_head_ptr;
409   /* Points to the last matched capture block index. */
410   sljit_s32 capture_last_ptr;
411   /* Fast forward skipping byte code pointer. */
412   PCRE2_SPTR fast_forward_bc_ptr;
413   /* Locals used by fast fail optimization. */
414   sljit_s32 early_fail_start_ptr;
415   sljit_s32 early_fail_end_ptr;
416   /* Variables used by recursive call generator. */
417   sljit_s32 recurse_bitset_size;
418   uint8_t *recurse_bitset;
419 
420   /* Flipped and lower case tables. */
421   const sljit_u8 *fcc;
422   sljit_sw lcc;
423   /* Mode can be PCRE2_JIT_COMPLETE and others. */
424   int mode;
425   /* TRUE, when empty match is accepted for partial matching. */
426   BOOL allow_empty_partial;
427   /* TRUE, when minlength is greater than 0. */
428   BOOL might_be_empty;
429   /* \K is found in the pattern. */
430   BOOL has_set_som;
431   /* (*SKIP:arg) is found in the pattern. */
432   BOOL has_skip_arg;
433   /* (*THEN) is found in the pattern. */
434   BOOL has_then;
435   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436   BOOL has_skip_in_assert_back;
437   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438   BOOL local_quit_available;
439   /* Currently in a positive assertion. */
440   BOOL in_positive_assertion;
441   /* Newline control. */
442   int nltype;
443   sljit_u32 nlmax;
444   sljit_u32 nlmin;
445   int newline;
446   int bsr_nltype;
447   sljit_u32 bsr_nlmax;
448   sljit_u32 bsr_nlmin;
449   /* Dollar endonly. */
450   int endonly;
451   /* Tables. */
452   sljit_sw ctypes;
453   /* Named capturing brackets. */
454   PCRE2_SPTR name_table;
455   sljit_sw name_count;
456   sljit_sw name_entry_size;
457 
458   /* Labels and jump lists. */
459   struct sljit_label *partialmatchlabel;
460   struct sljit_label *quit_label;
461   struct sljit_label *abort_label;
462   struct sljit_label *accept_label;
463   struct sljit_label *ff_newline_shortcut;
464   stub_list *stubs;
465   recurse_entry *entries;
466   recurse_entry *currententry;
467   jump_list *partialmatch;
468   jump_list *quit;
469   jump_list *positive_assertion_quit;
470   jump_list *abort;
471   jump_list *failed_match;
472   jump_list *accept;
473   jump_list *calllimit;
474   jump_list *stackalloc;
475   jump_list *revertframes;
476   jump_list *wordboundary;
477   jump_list *anynewline;
478   jump_list *hspace;
479   jump_list *vspace;
480   jump_list *casefulcmp;
481   jump_list *caselesscmp;
482   jump_list *reset_match;
483   BOOL unset_backref;
484   BOOL alt_circumflex;
485 #ifdef SUPPORT_UNICODE
486   BOOL utf;
487   BOOL invalid_utf;
488   BOOL ucp;
489   /* Points to saving area for iref. */
490   sljit_s32 iref_ptr;
491   jump_list *getucd;
492   jump_list *getucdtype;
493 #if PCRE2_CODE_UNIT_WIDTH == 8
494   jump_list *utfreadchar;
495   jump_list *utfreadtype8;
496   jump_list *utfpeakcharback;
497 #endif
498 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499   jump_list *utfreadchar_invalid;
500   jump_list *utfreadnewline_invalid;
501   jump_list *utfmoveback_invalid;
502   jump_list *utfpeakcharback_invalid;
503 #endif
504 #endif /* SUPPORT_UNICODE */
505 } compiler_common;
506 
507 /* For byte_sequence_compare. */
508 
509 typedef struct compare_context {
510   int length;
511   int sourcereg;
512 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513   int ucharptr;
514   union {
515     sljit_s32 asint;
516     sljit_u16 asushort;
517 #if PCRE2_CODE_UNIT_WIDTH == 8
518     sljit_u8 asbyte;
519     sljit_u8 asuchars[4];
520 #elif PCRE2_CODE_UNIT_WIDTH == 16
521     sljit_u16 asuchars[2];
522 #elif PCRE2_CODE_UNIT_WIDTH == 32
523     sljit_u32 asuchars[1];
524 #endif
525   } c;
526   union {
527     sljit_s32 asint;
528     sljit_u16 asushort;
529 #if PCRE2_CODE_UNIT_WIDTH == 8
530     sljit_u8 asbyte;
531     sljit_u8 asuchars[4];
532 #elif PCRE2_CODE_UNIT_WIDTH == 16
533     sljit_u16 asuchars[2];
534 #elif PCRE2_CODE_UNIT_WIDTH == 32
535     sljit_u32 asuchars[1];
536 #endif
537   } oc;
538 #endif
539 } compare_context;
540 
541 /* Undefine sljit macros. */
542 #undef CMP
543 
544 /* Used for accessing the elements of the stack. */
545 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
546 
547 #ifdef SLJIT_PREF_SHIFT_REG
548 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549 /* Nothing. */
550 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551 #define SHIFT_REG_IS_R3
552 #else
553 #error "Unsupported shift register"
554 #endif
555 #endif
556 
557 #define TMP1          SLJIT_R0
558 #ifdef SHIFT_REG_IS_R3
559 #define TMP2          SLJIT_R3
560 #define TMP3          SLJIT_R2
561 #else
562 #define TMP2          SLJIT_R2
563 #define TMP3          SLJIT_R3
564 #endif
565 #define STR_PTR       SLJIT_R1
566 #define STR_END       SLJIT_S0
567 #define STACK_TOP     SLJIT_S1
568 #define STACK_LIMIT   SLJIT_S2
569 #define COUNT_MATCH   SLJIT_S3
570 #define ARGUMENTS     SLJIT_S4
571 #define RETURN_ADDR   SLJIT_R4
572 
573 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574 #define HAS_VIRTUAL_REGISTERS 1
575 #else
576 #define HAS_VIRTUAL_REGISTERS 0
577 #endif
578 
579 /* Local space layout. */
580 /* These two locals can be used by the current opcode. */
581 #define LOCALS0          (0 * sizeof(sljit_sw))
582 #define LOCALS1          (1 * sizeof(sljit_sw))
583 /* Two local variables for possessive quantifiers (char1 cannot use them). */
584 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
585 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
586 /* Max limit of recursions. */
587 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
588 /* The output vector is stored on the stack, and contains pointers
589 to characters. The vector data is divided into two groups: the first
590 group contains the start / end character pointers, and the second is
591 the start pointers when the end of the capturing group has not yet reached. */
592 #define OVECTOR_START    (common->ovector_start)
593 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
594 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
595 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596 
597 #if PCRE2_CODE_UNIT_WIDTH == 8
598 #define MOV_UCHAR  SLJIT_MOV_U8
599 #define IN_UCHARS(x) (x)
600 #elif PCRE2_CODE_UNIT_WIDTH == 16
601 #define MOV_UCHAR  SLJIT_MOV_U16
602 #define UCHAR_SHIFT (1)
603 #define IN_UCHARS(x) ((x) * 2)
604 #elif PCRE2_CODE_UNIT_WIDTH == 32
605 #define MOV_UCHAR  SLJIT_MOV_U32
606 #define UCHAR_SHIFT (2)
607 #define IN_UCHARS(x) ((x) * 4)
608 #else
609 #error Unsupported compiling mode
610 #endif
611 
612 /* Shortcuts. */
613 #define DEFINE_COMPILER \
614   struct sljit_compiler *compiler = common->compiler
615 #define OP1(op, dst, dstw, src, srcw) \
616   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619 #define OP_SRC(op, src, srcw) \
620   sljit_emit_op_src(compiler, (op), (src), (srcw))
621 #define LABEL() \
622   sljit_emit_label(compiler)
623 #define JUMP(type) \
624   sljit_emit_jump(compiler, (type))
625 #define JUMPTO(type, label) \
626   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
627 #define JUMPHERE(jump) \
628   sljit_set_label((jump), sljit_emit_label(compiler))
629 #define SET_LABEL(jump, label) \
630   sljit_set_label((jump), (label))
631 #define CMP(type, src1, src1w, src2, src2w) \
632   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
633 #define CMPTO(type, src1, src1w, src2, src2w, label) \
634   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
635 #define OP_FLAGS(op, dst, dstw, type) \
636   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
637 #define CMOV(type, dst_reg, src, srcw) \
638   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
639 #define GET_LOCAL_BASE(dst, dstw, offset) \
640   sljit_get_local_base(compiler, (dst), (dstw), (offset))
641 
642 #define READ_CHAR_MAX 0x7fffffff
643 
644 #define INVALID_UTF_CHAR -1
645 #define UNASSIGNED_UTF_CHAR 888
646 
647 #if defined SUPPORT_UNICODE
648 #if PCRE2_CODE_UNIT_WIDTH == 8
649 
650 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
651   { \
652   if (ptr[0] <= 0x7f) \
653     c = *ptr++; \
654   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
655     { \
656     c = ptr[1] - 0x80; \
657     \
658     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
659       { \
660       c |= (ptr[0] - 0xc0) << 6; \
661       ptr += 2; \
662       } \
663     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
664       { \
665       c = c << 6 | (ptr[2] - 0x80); \
666       \
667       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
668         { \
669         c |= (ptr[0] - 0xe0) << 12; \
670         ptr += 3; \
671         \
672         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
673           { \
674           invalid_action; \
675           } \
676         } \
677       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
678         { \
679         c = c << 6 | (ptr[3] - 0x80); \
680         \
681         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
682           { \
683           c |= (ptr[0] - 0xf0) << 18; \
684           ptr += 4; \
685           \
686           if (c >= 0x110000 || c < 0x10000) \
687             { \
688             invalid_action; \
689             } \
690           } \
691         else \
692           { \
693           invalid_action; \
694           } \
695         } \
696       else \
697         { \
698         invalid_action; \
699         } \
700       } \
701     else \
702       { \
703       invalid_action; \
704       } \
705     } \
706   else \
707     { \
708     invalid_action; \
709     } \
710   }
711 
712 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
713   { \
714   c = ptr[-1]; \
715   if (c <= 0x7f) \
716     ptr--; \
717   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
718     { \
719     c -= 0x80; \
720     \
721     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
722       { \
723       c |= (ptr[-2] - 0xc0) << 6; \
724       ptr -= 2; \
725       } \
726     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
727       { \
728       c = c << 6 | (ptr[-2] - 0x80); \
729       \
730       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
731         { \
732         c |= (ptr[-3] - 0xe0) << 12; \
733         ptr -= 3; \
734         \
735         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
736           { \
737           invalid_action; \
738           } \
739         } \
740       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
741         { \
742         c = c << 6 | (ptr[-3] - 0x80); \
743         \
744         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
745           { \
746           c |= (ptr[-4] - 0xf0) << 18; \
747           ptr -= 4; \
748           \
749           if (c >= 0x110000 || c < 0x10000) \
750             { \
751             invalid_action; \
752             } \
753           } \
754         else \
755           { \
756           invalid_action; \
757           } \
758         } \
759       else \
760         { \
761         invalid_action; \
762         } \
763       } \
764     else \
765       { \
766       invalid_action; \
767       } \
768     } \
769   else \
770     { \
771     invalid_action; \
772     } \
773   }
774 
775 #elif PCRE2_CODE_UNIT_WIDTH == 16
776 
777 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
778   { \
779   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
780     c = *ptr++; \
781   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
782     { \
783     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
784     ptr += 2; \
785     } \
786   else \
787     { \
788     invalid_action; \
789     } \
790   }
791 
792 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
793   { \
794   c = ptr[-1]; \
795   if (c < 0xd800 || c >= 0xe000) \
796     ptr--; \
797   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
798     { \
799     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
800     ptr -= 2; \
801     } \
802   else \
803     { \
804     invalid_action; \
805     } \
806   }
807 
808 
809 #elif PCRE2_CODE_UNIT_WIDTH == 32
810 
811 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
812   { \
813   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
814     c = *ptr++; \
815   else \
816     { \
817     invalid_action; \
818     } \
819   }
820 
821 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
822   { \
823   c = ptr[-1]; \
824   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
825     ptr--; \
826   else \
827     { \
828     invalid_action; \
829     } \
830   }
831 
832 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
833 #endif /* SUPPORT_UNICODE */
834 
bracketend(PCRE2_SPTR cc)835 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
836 {
837 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
838 do cc += GET(cc, 1); while (*cc == OP_ALT);
839 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
840 cc += 1 + LINK_SIZE;
841 return cc;
842 }
843 
no_alternatives(PCRE2_SPTR cc)844 static int no_alternatives(PCRE2_SPTR cc)
845 {
846 int count = 0;
847 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
848 do
849   {
850   cc += GET(cc, 1);
851   count++;
852   }
853 while (*cc == OP_ALT);
854 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
855 return count;
856 }
857 
858 /* Functions whose might need modification for all new supported opcodes:
859  next_opcode
860  check_opcode_types
861  set_private_data_ptrs
862  get_framesize
863  init_frame
864  get_recurse_data_length
865  copy_recurse_data
866  compile_matchingpath
867  compile_backtrackingpath
868 */
869 
next_opcode(compiler_common * common,PCRE2_SPTR cc)870 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
871 {
872 SLJIT_UNUSED_ARG(common);
873 switch(*cc)
874   {
875   case OP_SOD:
876   case OP_SOM:
877   case OP_SET_SOM:
878   case OP_NOT_WORD_BOUNDARY:
879   case OP_WORD_BOUNDARY:
880   case OP_NOT_DIGIT:
881   case OP_DIGIT:
882   case OP_NOT_WHITESPACE:
883   case OP_WHITESPACE:
884   case OP_NOT_WORDCHAR:
885   case OP_WORDCHAR:
886   case OP_ANY:
887   case OP_ALLANY:
888   case OP_NOTPROP:
889   case OP_PROP:
890   case OP_ANYNL:
891   case OP_NOT_HSPACE:
892   case OP_HSPACE:
893   case OP_NOT_VSPACE:
894   case OP_VSPACE:
895   case OP_EXTUNI:
896   case OP_EODN:
897   case OP_EOD:
898   case OP_CIRC:
899   case OP_CIRCM:
900   case OP_DOLL:
901   case OP_DOLLM:
902   case OP_CRSTAR:
903   case OP_CRMINSTAR:
904   case OP_CRPLUS:
905   case OP_CRMINPLUS:
906   case OP_CRQUERY:
907   case OP_CRMINQUERY:
908   case OP_CRRANGE:
909   case OP_CRMINRANGE:
910   case OP_CRPOSSTAR:
911   case OP_CRPOSPLUS:
912   case OP_CRPOSQUERY:
913   case OP_CRPOSRANGE:
914   case OP_CLASS:
915   case OP_NCLASS:
916   case OP_REF:
917   case OP_REFI:
918   case OP_DNREF:
919   case OP_DNREFI:
920   case OP_RECURSE:
921   case OP_CALLOUT:
922   case OP_ALT:
923   case OP_KET:
924   case OP_KETRMAX:
925   case OP_KETRMIN:
926   case OP_KETRPOS:
927   case OP_REVERSE:
928   case OP_ASSERT:
929   case OP_ASSERT_NOT:
930   case OP_ASSERTBACK:
931   case OP_ASSERTBACK_NOT:
932   case OP_ASSERT_NA:
933   case OP_ASSERTBACK_NA:
934   case OP_ONCE:
935   case OP_SCRIPT_RUN:
936   case OP_BRA:
937   case OP_BRAPOS:
938   case OP_CBRA:
939   case OP_CBRAPOS:
940   case OP_COND:
941   case OP_SBRA:
942   case OP_SBRAPOS:
943   case OP_SCBRA:
944   case OP_SCBRAPOS:
945   case OP_SCOND:
946   case OP_CREF:
947   case OP_DNCREF:
948   case OP_RREF:
949   case OP_DNRREF:
950   case OP_FALSE:
951   case OP_TRUE:
952   case OP_BRAZERO:
953   case OP_BRAMINZERO:
954   case OP_BRAPOSZERO:
955   case OP_PRUNE:
956   case OP_SKIP:
957   case OP_THEN:
958   case OP_COMMIT:
959   case OP_FAIL:
960   case OP_ACCEPT:
961   case OP_ASSERT_ACCEPT:
962   case OP_CLOSE:
963   case OP_SKIPZERO:
964   return cc + PRIV(OP_lengths)[*cc];
965 
966   case OP_CHAR:
967   case OP_CHARI:
968   case OP_NOT:
969   case OP_NOTI:
970   case OP_STAR:
971   case OP_MINSTAR:
972   case OP_PLUS:
973   case OP_MINPLUS:
974   case OP_QUERY:
975   case OP_MINQUERY:
976   case OP_UPTO:
977   case OP_MINUPTO:
978   case OP_EXACT:
979   case OP_POSSTAR:
980   case OP_POSPLUS:
981   case OP_POSQUERY:
982   case OP_POSUPTO:
983   case OP_STARI:
984   case OP_MINSTARI:
985   case OP_PLUSI:
986   case OP_MINPLUSI:
987   case OP_QUERYI:
988   case OP_MINQUERYI:
989   case OP_UPTOI:
990   case OP_MINUPTOI:
991   case OP_EXACTI:
992   case OP_POSSTARI:
993   case OP_POSPLUSI:
994   case OP_POSQUERYI:
995   case OP_POSUPTOI:
996   case OP_NOTSTAR:
997   case OP_NOTMINSTAR:
998   case OP_NOTPLUS:
999   case OP_NOTMINPLUS:
1000   case OP_NOTQUERY:
1001   case OP_NOTMINQUERY:
1002   case OP_NOTUPTO:
1003   case OP_NOTMINUPTO:
1004   case OP_NOTEXACT:
1005   case OP_NOTPOSSTAR:
1006   case OP_NOTPOSPLUS:
1007   case OP_NOTPOSQUERY:
1008   case OP_NOTPOSUPTO:
1009   case OP_NOTSTARI:
1010   case OP_NOTMINSTARI:
1011   case OP_NOTPLUSI:
1012   case OP_NOTMINPLUSI:
1013   case OP_NOTQUERYI:
1014   case OP_NOTMINQUERYI:
1015   case OP_NOTUPTOI:
1016   case OP_NOTMINUPTOI:
1017   case OP_NOTEXACTI:
1018   case OP_NOTPOSSTARI:
1019   case OP_NOTPOSPLUSI:
1020   case OP_NOTPOSQUERYI:
1021   case OP_NOTPOSUPTOI:
1022   cc += PRIV(OP_lengths)[*cc];
1023 #ifdef SUPPORT_UNICODE
1024   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1025 #endif
1026   return cc;
1027 
1028   /* Special cases. */
1029   case OP_TYPESTAR:
1030   case OP_TYPEMINSTAR:
1031   case OP_TYPEPLUS:
1032   case OP_TYPEMINPLUS:
1033   case OP_TYPEQUERY:
1034   case OP_TYPEMINQUERY:
1035   case OP_TYPEUPTO:
1036   case OP_TYPEMINUPTO:
1037   case OP_TYPEEXACT:
1038   case OP_TYPEPOSSTAR:
1039   case OP_TYPEPOSPLUS:
1040   case OP_TYPEPOSQUERY:
1041   case OP_TYPEPOSUPTO:
1042   return cc + PRIV(OP_lengths)[*cc] - 1;
1043 
1044   case OP_ANYBYTE:
1045 #ifdef SUPPORT_UNICODE
1046   if (common->utf) return NULL;
1047 #endif
1048   return cc + 1;
1049 
1050   case OP_CALLOUT_STR:
1051   return cc + GET(cc, 1 + 2*LINK_SIZE);
1052 
1053 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1054   case OP_XCLASS:
1055   return cc + GET(cc, 1);
1056 #endif
1057 
1058   case OP_MARK:
1059   case OP_COMMIT_ARG:
1060   case OP_PRUNE_ARG:
1061   case OP_SKIP_ARG:
1062   case OP_THEN_ARG:
1063   return cc + 1 + 2 + cc[1];
1064 
1065   default:
1066   SLJIT_UNREACHABLE();
1067   return NULL;
1068   }
1069 }
1070 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1071 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1072 {
1073 int count;
1074 PCRE2_SPTR slot;
1075 PCRE2_SPTR assert_back_end = cc - 1;
1076 PCRE2_SPTR assert_na_end = cc - 1;
1077 
1078 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1079 while (cc < ccend)
1080   {
1081   switch(*cc)
1082     {
1083     case OP_SET_SOM:
1084     common->has_set_som = TRUE;
1085     common->might_be_empty = TRUE;
1086     cc += 1;
1087     break;
1088 
1089     case OP_REFI:
1090 #ifdef SUPPORT_UNICODE
1091     if (common->iref_ptr == 0)
1092       {
1093       common->iref_ptr = common->ovector_start;
1094       common->ovector_start += 3 * sizeof(sljit_sw);
1095       }
1096 #endif /* SUPPORT_UNICODE */
1097     /* Fall through. */
1098     case OP_REF:
1099     common->optimized_cbracket[GET2(cc, 1)] = 0;
1100     cc += 1 + IMM2_SIZE;
1101     break;
1102 
1103     case OP_ASSERT_NA:
1104     case OP_ASSERTBACK_NA:
1105     slot = bracketend(cc);
1106     if (slot > assert_na_end)
1107       assert_na_end = slot;
1108     cc += 1 + LINK_SIZE;
1109     break;
1110 
1111     case OP_CBRAPOS:
1112     case OP_SCBRAPOS:
1113     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1114     cc += 1 + LINK_SIZE + IMM2_SIZE;
1115     break;
1116 
1117     case OP_COND:
1118     case OP_SCOND:
1119     /* Only AUTO_CALLOUT can insert this opcode. We do
1120        not intend to support this case. */
1121     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1122       return FALSE;
1123     cc += 1 + LINK_SIZE;
1124     break;
1125 
1126     case OP_CREF:
1127     common->optimized_cbracket[GET2(cc, 1)] = 0;
1128     cc += 1 + IMM2_SIZE;
1129     break;
1130 
1131     case OP_DNREF:
1132     case OP_DNREFI:
1133     case OP_DNCREF:
1134     count = GET2(cc, 1 + IMM2_SIZE);
1135     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1136     while (count-- > 0)
1137       {
1138       common->optimized_cbracket[GET2(slot, 0)] = 0;
1139       slot += common->name_entry_size;
1140       }
1141     cc += 1 + 2 * IMM2_SIZE;
1142     break;
1143 
1144     case OP_RECURSE:
1145     /* Set its value only once. */
1146     if (common->recursive_head_ptr == 0)
1147       {
1148       common->recursive_head_ptr = common->ovector_start;
1149       common->ovector_start += sizeof(sljit_sw);
1150       }
1151     cc += 1 + LINK_SIZE;
1152     break;
1153 
1154     case OP_CALLOUT:
1155     case OP_CALLOUT_STR:
1156     if (common->capture_last_ptr == 0)
1157       {
1158       common->capture_last_ptr = common->ovector_start;
1159       common->ovector_start += sizeof(sljit_sw);
1160       }
1161     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1162     break;
1163 
1164     case OP_ASSERTBACK:
1165     slot = bracketend(cc);
1166     if (slot > assert_back_end)
1167       assert_back_end = slot;
1168     cc += 1 + LINK_SIZE;
1169     break;
1170 
1171     case OP_THEN_ARG:
1172     common->has_then = TRUE;
1173     common->control_head_ptr = 1;
1174     /* Fall through. */
1175 
1176     case OP_COMMIT_ARG:
1177     case OP_PRUNE_ARG:
1178     if (cc < assert_na_end)
1179       return FALSE;
1180     /* Fall through */
1181     case OP_MARK:
1182     if (common->mark_ptr == 0)
1183       {
1184       common->mark_ptr = common->ovector_start;
1185       common->ovector_start += sizeof(sljit_sw);
1186       }
1187     cc += 1 + 2 + cc[1];
1188     break;
1189 
1190     case OP_THEN:
1191     common->has_then = TRUE;
1192     common->control_head_ptr = 1;
1193     cc += 1;
1194     break;
1195 
1196     case OP_SKIP:
1197     if (cc < assert_back_end)
1198       common->has_skip_in_assert_back = TRUE;
1199     if (cc < assert_na_end)
1200       return FALSE;
1201     cc += 1;
1202     break;
1203 
1204     case OP_SKIP_ARG:
1205     common->control_head_ptr = 1;
1206     common->has_skip_arg = TRUE;
1207     if (cc < assert_back_end)
1208       common->has_skip_in_assert_back = TRUE;
1209     if (cc < assert_na_end)
1210       return FALSE;
1211     cc += 1 + 2 + cc[1];
1212     break;
1213 
1214     case OP_PRUNE:
1215     case OP_COMMIT:
1216     case OP_ASSERT_ACCEPT:
1217     if (cc < assert_na_end)
1218       return FALSE;
1219     cc++;
1220     break;
1221 
1222     default:
1223     cc = next_opcode(common, cc);
1224     if (cc == NULL)
1225       return FALSE;
1226     break;
1227     }
1228   }
1229 return TRUE;
1230 }
1231 
1232 #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1233 
1234 /*
1235 start:
1236   0 - skip / early fail allowed
1237   1 - only early fail with range allowed
1238   >1 - (start - 1) early fail is processed
1239 
1240 return: current number of iterators enhanced with fast fail
1241 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start,BOOL fast_forward_allowed)1242 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1243    sljit_s32 depth, int start, BOOL fast_forward_allowed)
1244 {
1245 PCRE2_SPTR begin = cc;
1246 PCRE2_SPTR next_alt;
1247 PCRE2_SPTR end;
1248 PCRE2_SPTR accelerated_start;
1249 BOOL prev_fast_forward_allowed;
1250 int result = 0;
1251 int count;
1252 
1253 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1254 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1255 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1256 
1257 next_alt = cc + GET(cc, 1);
1258 if (*next_alt == OP_ALT)
1259   fast_forward_allowed = FALSE;
1260 
1261 do
1262   {
1263   count = start;
1264   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1265 
1266   while (TRUE)
1267     {
1268     accelerated_start = NULL;
1269 
1270     switch(*cc)
1271       {
1272       case OP_SOD:
1273       case OP_SOM:
1274       case OP_SET_SOM:
1275       case OP_NOT_WORD_BOUNDARY:
1276       case OP_WORD_BOUNDARY:
1277       case OP_EODN:
1278       case OP_EOD:
1279       case OP_CIRC:
1280       case OP_CIRCM:
1281       case OP_DOLL:
1282       case OP_DOLLM:
1283       /* Zero width assertions. */
1284       cc++;
1285       continue;
1286 
1287       case OP_NOT_DIGIT:
1288       case OP_DIGIT:
1289       case OP_NOT_WHITESPACE:
1290       case OP_WHITESPACE:
1291       case OP_NOT_WORDCHAR:
1292       case OP_WORDCHAR:
1293       case OP_ANY:
1294       case OP_ALLANY:
1295       case OP_ANYBYTE:
1296       case OP_NOT_HSPACE:
1297       case OP_HSPACE:
1298       case OP_NOT_VSPACE:
1299       case OP_VSPACE:
1300       fast_forward_allowed = FALSE;
1301       cc++;
1302       continue;
1303 
1304       case OP_ANYNL:
1305       case OP_EXTUNI:
1306       fast_forward_allowed = FALSE;
1307       if (count == 0)
1308         count = 1;
1309       cc++;
1310       continue;
1311 
1312       case OP_NOTPROP:
1313       case OP_PROP:
1314       fast_forward_allowed = FALSE;
1315       cc += 1 + 2;
1316       continue;
1317 
1318       case OP_CHAR:
1319       case OP_CHARI:
1320       case OP_NOT:
1321       case OP_NOTI:
1322       fast_forward_allowed = FALSE;
1323       cc += 2;
1324 #ifdef SUPPORT_UNICODE
1325       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1326 #endif
1327       continue;
1328 
1329       case OP_TYPESTAR:
1330       case OP_TYPEMINSTAR:
1331       case OP_TYPEPLUS:
1332       case OP_TYPEMINPLUS:
1333       case OP_TYPEPOSSTAR:
1334       case OP_TYPEPOSPLUS:
1335       /* The type or prop opcode is skipped in the next iteration. */
1336       cc += 1;
1337 
1338       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1339         {
1340         accelerated_start = cc - 1;
1341         break;
1342         }
1343 
1344       if (count == 0)
1345         count = 1;
1346       fast_forward_allowed = FALSE;
1347       continue;
1348 
1349       case OP_TYPEUPTO:
1350       case OP_TYPEMINUPTO:
1351       case OP_TYPEEXACT:
1352       case OP_TYPEPOSUPTO:
1353       cc += IMM2_SIZE;
1354       /* Fall through */
1355 
1356       case OP_TYPEQUERY:
1357       case OP_TYPEMINQUERY:
1358       case OP_TYPEPOSQUERY:
1359       /* The type or prop opcode is skipped in the next iteration. */
1360       fast_forward_allowed = FALSE;
1361       if (count == 0)
1362         count = 1;
1363       cc += 1;
1364       continue;
1365 
1366       case OP_STAR:
1367       case OP_MINSTAR:
1368       case OP_PLUS:
1369       case OP_MINPLUS:
1370       case OP_POSSTAR:
1371       case OP_POSPLUS:
1372 
1373       case OP_STARI:
1374       case OP_MINSTARI:
1375       case OP_PLUSI:
1376       case OP_MINPLUSI:
1377       case OP_POSSTARI:
1378       case OP_POSPLUSI:
1379 
1380       case OP_NOTSTAR:
1381       case OP_NOTMINSTAR:
1382       case OP_NOTPLUS:
1383       case OP_NOTMINPLUS:
1384       case OP_NOTPOSSTAR:
1385       case OP_NOTPOSPLUS:
1386 
1387       case OP_NOTSTARI:
1388       case OP_NOTMINSTARI:
1389       case OP_NOTPLUSI:
1390       case OP_NOTMINPLUSI:
1391       case OP_NOTPOSSTARI:
1392       case OP_NOTPOSPLUSI:
1393       accelerated_start = cc;
1394       cc += 2;
1395 #ifdef SUPPORT_UNICODE
1396       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1397 #endif
1398       break;
1399 
1400       case OP_UPTO:
1401       case OP_MINUPTO:
1402       case OP_EXACT:
1403       case OP_POSUPTO:
1404       case OP_UPTOI:
1405       case OP_MINUPTOI:
1406       case OP_EXACTI:
1407       case OP_POSUPTOI:
1408       case OP_NOTUPTO:
1409       case OP_NOTMINUPTO:
1410       case OP_NOTEXACT:
1411       case OP_NOTPOSUPTO:
1412       case OP_NOTUPTOI:
1413       case OP_NOTMINUPTOI:
1414       case OP_NOTEXACTI:
1415       case OP_NOTPOSUPTOI:
1416       cc += IMM2_SIZE;
1417       /* Fall through */
1418 
1419       case OP_QUERY:
1420       case OP_MINQUERY:
1421       case OP_POSQUERY:
1422       case OP_QUERYI:
1423       case OP_MINQUERYI:
1424       case OP_POSQUERYI:
1425       case OP_NOTQUERY:
1426       case OP_NOTMINQUERY:
1427       case OP_NOTPOSQUERY:
1428       case OP_NOTQUERYI:
1429       case OP_NOTMINQUERYI:
1430       case OP_NOTPOSQUERYI:
1431       fast_forward_allowed = FALSE;
1432       if (count == 0)
1433         count = 1;
1434       cc += 2;
1435 #ifdef SUPPORT_UNICODE
1436       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1437 #endif
1438       continue;
1439 
1440       case OP_CLASS:
1441       case OP_NCLASS:
1442 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1443       case OP_XCLASS:
1444       accelerated_start = cc;
1445       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1446 #else
1447       accelerated_start = cc;
1448       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1449 #endif
1450 
1451       switch (*cc)
1452         {
1453         case OP_CRSTAR:
1454         case OP_CRMINSTAR:
1455         case OP_CRPLUS:
1456         case OP_CRMINPLUS:
1457         case OP_CRPOSSTAR:
1458         case OP_CRPOSPLUS:
1459         cc++;
1460         break;
1461 
1462         case OP_CRRANGE:
1463         case OP_CRMINRANGE:
1464         case OP_CRPOSRANGE:
1465         cc += 2 * IMM2_SIZE;
1466         /* Fall through */
1467         case OP_CRQUERY:
1468         case OP_CRMINQUERY:
1469         case OP_CRPOSQUERY:
1470         cc++;
1471         if (count == 0)
1472           count = 1;
1473         /* Fall through */
1474         default:
1475         accelerated_start = NULL;
1476         fast_forward_allowed = FALSE;
1477         continue;
1478         }
1479       break;
1480 
1481       case OP_ONCE:
1482       case OP_BRA:
1483       case OP_CBRA:
1484       end = cc + GET(cc, 1);
1485 
1486       prev_fast_forward_allowed = fast_forward_allowed;
1487       fast_forward_allowed = FALSE;
1488       if (depth >= 4)
1489         break;
1490 
1491       end = bracketend(cc) - (1 + LINK_SIZE);
1492       if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1493         break;
1494 
1495       count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1496 
1497       if (PRIVATE_DATA(cc) != 0)
1498         common->private_data_ptrs[begin - common->start] = 1;
1499 
1500       if (count < EARLY_FAIL_ENHANCE_MAX)
1501         {
1502         cc = end + (1 + LINK_SIZE);
1503         continue;
1504         }
1505       break;
1506 
1507       case OP_KET:
1508       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1509       if (cc >= next_alt)
1510         break;
1511       cc += 1 + LINK_SIZE;
1512       continue;
1513       }
1514 
1515     if (accelerated_start != NULL)
1516       {
1517       if (count == 0)
1518         {
1519         count++;
1520 
1521         if (fast_forward_allowed)
1522           {
1523           common->fast_forward_bc_ptr = accelerated_start;
1524           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1525           *private_data_start += sizeof(sljit_sw);
1526           }
1527         else
1528           {
1529           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1530 
1531           if (common->early_fail_start_ptr == 0)
1532             common->early_fail_start_ptr = *private_data_start;
1533 
1534           *private_data_start += sizeof(sljit_sw);
1535           common->early_fail_end_ptr = *private_data_start;
1536 
1537           if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1538             return EARLY_FAIL_ENHANCE_MAX;
1539           }
1540         }
1541       else
1542         {
1543         common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1544 
1545         if (common->early_fail_start_ptr == 0)
1546           common->early_fail_start_ptr = *private_data_start;
1547 
1548         *private_data_start += 2 * sizeof(sljit_sw);
1549         common->early_fail_end_ptr = *private_data_start;
1550 
1551         if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1552           return EARLY_FAIL_ENHANCE_MAX;
1553         }
1554 
1555       /* Cannot be part of a repeat. */
1556       common->private_data_ptrs[begin - common->start] = 1;
1557       count++;
1558 
1559       if (count < EARLY_FAIL_ENHANCE_MAX)
1560         continue;
1561       }
1562 
1563     break;
1564     }
1565 
1566   if (*cc != OP_ALT && *cc != OP_KET)
1567     result = EARLY_FAIL_ENHANCE_MAX;
1568   else if (result < count)
1569     result = count;
1570 
1571   cc = next_alt;
1572   next_alt = cc + GET(cc, 1);
1573   }
1574 while (*cc == OP_ALT);
1575 
1576 return result;
1577 }
1578 
get_class_iterator_size(PCRE2_SPTR cc)1579 static int get_class_iterator_size(PCRE2_SPTR cc)
1580 {
1581 sljit_u32 min;
1582 sljit_u32 max;
1583 switch(*cc)
1584   {
1585   case OP_CRSTAR:
1586   case OP_CRPLUS:
1587   return 2;
1588 
1589   case OP_CRMINSTAR:
1590   case OP_CRMINPLUS:
1591   case OP_CRQUERY:
1592   case OP_CRMINQUERY:
1593   return 1;
1594 
1595   case OP_CRRANGE:
1596   case OP_CRMINRANGE:
1597   min = GET2(cc, 1);
1598   max = GET2(cc, 1 + IMM2_SIZE);
1599   if (max == 0)
1600     return (*cc == OP_CRRANGE) ? 2 : 1;
1601   max -= min;
1602   if (max > 2)
1603     max = 2;
1604   return max;
1605 
1606   default:
1607   return 0;
1608   }
1609 }
1610 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1611 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1612 {
1613 PCRE2_SPTR end = bracketend(begin);
1614 PCRE2_SPTR next;
1615 PCRE2_SPTR next_end;
1616 PCRE2_SPTR max_end;
1617 PCRE2_UCHAR type;
1618 sljit_sw length = end - begin;
1619 sljit_s32 min, max, i;
1620 
1621 /* Detect fixed iterations first. */
1622 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1623   return FALSE;
1624 
1625 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1626  * Skip the check of the second part. */
1627 if (PRIVATE_DATA(end - LINK_SIZE) == 0)
1628   return TRUE;
1629 
1630 next = end;
1631 min = 1;
1632 while (1)
1633   {
1634   if (*next != *begin)
1635     break;
1636   next_end = bracketend(next);
1637   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1638     break;
1639   next = next_end;
1640   min++;
1641   }
1642 
1643 if (min == 2)
1644   return FALSE;
1645 
1646 max = 0;
1647 max_end = next;
1648 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1649   {
1650   type = *next;
1651   while (1)
1652     {
1653     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1654       break;
1655     next_end = bracketend(next + 2 + LINK_SIZE);
1656     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1657       break;
1658     next = next_end;
1659     max++;
1660     }
1661 
1662   if (next[0] == type && next[1] == *begin && max >= 1)
1663     {
1664     next_end = bracketend(next + 1);
1665     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1666       {
1667       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1668         if (*next_end != OP_KET)
1669           break;
1670 
1671       if (i == max)
1672         {
1673         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1674         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1675         /* +2 the original and the last. */
1676         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1677         if (min == 1)
1678           return TRUE;
1679         min--;
1680         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1681         }
1682       }
1683     }
1684   }
1685 
1686 if (min >= 3)
1687   {
1688   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1689   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1690   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1691   return TRUE;
1692   }
1693 
1694 return FALSE;
1695 }
1696 
1697 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1698     case OP_MINSTAR: \
1699     case OP_MINPLUS: \
1700     case OP_QUERY: \
1701     case OP_MINQUERY: \
1702     case OP_MINSTARI: \
1703     case OP_MINPLUSI: \
1704     case OP_QUERYI: \
1705     case OP_MINQUERYI: \
1706     case OP_NOTMINSTAR: \
1707     case OP_NOTMINPLUS: \
1708     case OP_NOTQUERY: \
1709     case OP_NOTMINQUERY: \
1710     case OP_NOTMINSTARI: \
1711     case OP_NOTMINPLUSI: \
1712     case OP_NOTQUERYI: \
1713     case OP_NOTMINQUERYI:
1714 
1715 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1716     case OP_STAR: \
1717     case OP_PLUS: \
1718     case OP_STARI: \
1719     case OP_PLUSI: \
1720     case OP_NOTSTAR: \
1721     case OP_NOTPLUS: \
1722     case OP_NOTSTARI: \
1723     case OP_NOTPLUSI:
1724 
1725 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1726     case OP_UPTO: \
1727     case OP_MINUPTO: \
1728     case OP_UPTOI: \
1729     case OP_MINUPTOI: \
1730     case OP_NOTUPTO: \
1731     case OP_NOTMINUPTO: \
1732     case OP_NOTUPTOI: \
1733     case OP_NOTMINUPTOI:
1734 
1735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1736     case OP_TYPEMINSTAR: \
1737     case OP_TYPEMINPLUS: \
1738     case OP_TYPEQUERY: \
1739     case OP_TYPEMINQUERY:
1740 
1741 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1742     case OP_TYPESTAR: \
1743     case OP_TYPEPLUS:
1744 
1745 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1746     case OP_TYPEUPTO: \
1747     case OP_TYPEMINUPTO:
1748 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1749 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1750 {
1751 PCRE2_SPTR cc = common->start;
1752 PCRE2_SPTR alternative;
1753 PCRE2_SPTR end = NULL;
1754 int private_data_ptr = *private_data_start;
1755 int space, size, bracketlen;
1756 BOOL repeat_check = TRUE;
1757 
1758 while (cc < ccend)
1759   {
1760   space = 0;
1761   size = 0;
1762   bracketlen = 0;
1763   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1764     break;
1765 
1766   /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1767   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1768     {
1769     if (detect_repeat(common, cc))
1770       {
1771       /* These brackets are converted to repeats, so no global
1772       based single character repeat is allowed. */
1773       if (cc >= end)
1774         end = bracketend(cc);
1775       }
1776     }
1777   repeat_check = TRUE;
1778 
1779   switch(*cc)
1780     {
1781     case OP_KET:
1782     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1783       {
1784       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1785       private_data_ptr += sizeof(sljit_sw);
1786       cc += common->private_data_ptrs[cc + 1 - common->start];
1787       }
1788     cc += 1 + LINK_SIZE;
1789     break;
1790 
1791     case OP_ASSERT:
1792     case OP_ASSERT_NOT:
1793     case OP_ASSERTBACK:
1794     case OP_ASSERTBACK_NOT:
1795     case OP_ASSERT_NA:
1796     case OP_ASSERTBACK_NA:
1797     case OP_ONCE:
1798     case OP_SCRIPT_RUN:
1799     case OP_BRAPOS:
1800     case OP_SBRA:
1801     case OP_SBRAPOS:
1802     case OP_SCOND:
1803     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1804     private_data_ptr += sizeof(sljit_sw);
1805     bracketlen = 1 + LINK_SIZE;
1806     break;
1807 
1808     case OP_CBRAPOS:
1809     case OP_SCBRAPOS:
1810     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1811     private_data_ptr += sizeof(sljit_sw);
1812     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1813     break;
1814 
1815     case OP_COND:
1816     /* Might be a hidden SCOND. */
1817     common->private_data_ptrs[cc - common->start] = 0;
1818     alternative = cc + GET(cc, 1);
1819     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820       {
1821       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1822       private_data_ptr += sizeof(sljit_sw);
1823       }
1824     bracketlen = 1 + LINK_SIZE;
1825     break;
1826 
1827     case OP_BRA:
1828     bracketlen = 1 + LINK_SIZE;
1829     break;
1830 
1831     case OP_CBRA:
1832     case OP_SCBRA:
1833     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1834     break;
1835 
1836     case OP_BRAZERO:
1837     case OP_BRAMINZERO:
1838     case OP_BRAPOSZERO:
1839     size = 1;
1840     repeat_check = FALSE;
1841     break;
1842 
1843     CASE_ITERATOR_PRIVATE_DATA_1
1844     size = -2;
1845     space = 1;
1846     break;
1847 
1848     CASE_ITERATOR_PRIVATE_DATA_2A
1849     size = -2;
1850     space = 2;
1851     break;
1852 
1853     CASE_ITERATOR_PRIVATE_DATA_2B
1854     size = -(2 + IMM2_SIZE);
1855     space = 2;
1856     break;
1857 
1858     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1859     size = 1;
1860     space = 1;
1861     break;
1862 
1863     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1864     size = 1;
1865     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1866       space = 2;
1867     break;
1868 
1869     case OP_TYPEUPTO:
1870     size = 1 + IMM2_SIZE;
1871     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1872       space = 2;
1873     break;
1874 
1875     case OP_TYPEMINUPTO:
1876     size = 1 + IMM2_SIZE;
1877     space = 2;
1878     break;
1879 
1880     case OP_CLASS:
1881     case OP_NCLASS:
1882     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1883     space = get_class_iterator_size(cc + size);
1884     break;
1885 
1886 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1887     case OP_XCLASS:
1888     size = GET(cc, 1);
1889     space = get_class_iterator_size(cc + size);
1890     break;
1891 #endif
1892 
1893     default:
1894     cc = next_opcode(common, cc);
1895     SLJIT_ASSERT(cc != NULL);
1896     break;
1897     }
1898 
1899   /* Character iterators, which are not inside a repeated bracket,
1900      gets a private slot instead of allocating it on the stack. */
1901   if (space > 0 && cc >= end)
1902     {
1903     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1904     private_data_ptr += sizeof(sljit_sw) * space;
1905     }
1906 
1907   if (size != 0)
1908     {
1909     if (size < 0)
1910       {
1911       cc += -size;
1912 #ifdef SUPPORT_UNICODE
1913       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1914 #endif
1915       }
1916     else
1917       cc += size;
1918     }
1919 
1920   if (bracketlen > 0)
1921     {
1922     if (cc >= end)
1923       {
1924       end = bracketend(cc);
1925       if (end[-1 - LINK_SIZE] == OP_KET)
1926         end = NULL;
1927       }
1928     cc += bracketlen;
1929     }
1930   }
1931 *private_data_start = private_data_ptr;
1932 }
1933 
1934 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1935 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1936 {
1937 int length = 0;
1938 int possessive = 0;
1939 BOOL stack_restore = FALSE;
1940 BOOL setsom_found = recursive;
1941 BOOL setmark_found = recursive;
1942 /* The last capture is a local variable even for recursions. */
1943 BOOL capture_last_found = FALSE;
1944 
1945 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1946 SLJIT_ASSERT(common->control_head_ptr != 0);
1947 *needs_control_head = TRUE;
1948 #else
1949 *needs_control_head = FALSE;
1950 #endif
1951 
1952 if (ccend == NULL)
1953   {
1954   ccend = bracketend(cc) - (1 + LINK_SIZE);
1955   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1956     {
1957     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1958     /* This is correct regardless of common->capture_last_ptr. */
1959     capture_last_found = TRUE;
1960     }
1961   cc = next_opcode(common, cc);
1962   }
1963 
1964 SLJIT_ASSERT(cc != NULL);
1965 while (cc < ccend)
1966   switch(*cc)
1967     {
1968     case OP_SET_SOM:
1969     SLJIT_ASSERT(common->has_set_som);
1970     stack_restore = TRUE;
1971     if (!setsom_found)
1972       {
1973       length += 2;
1974       setsom_found = TRUE;
1975       }
1976     cc += 1;
1977     break;
1978 
1979     case OP_MARK:
1980     case OP_COMMIT_ARG:
1981     case OP_PRUNE_ARG:
1982     case OP_THEN_ARG:
1983     SLJIT_ASSERT(common->mark_ptr != 0);
1984     stack_restore = TRUE;
1985     if (!setmark_found)
1986       {
1987       length += 2;
1988       setmark_found = TRUE;
1989       }
1990     if (common->control_head_ptr != 0)
1991       *needs_control_head = TRUE;
1992     cc += 1 + 2 + cc[1];
1993     break;
1994 
1995     case OP_RECURSE:
1996     stack_restore = TRUE;
1997     if (common->has_set_som && !setsom_found)
1998       {
1999       length += 2;
2000       setsom_found = TRUE;
2001       }
2002     if (common->mark_ptr != 0 && !setmark_found)
2003       {
2004       length += 2;
2005       setmark_found = TRUE;
2006       }
2007     if (common->capture_last_ptr != 0 && !capture_last_found)
2008       {
2009       length += 2;
2010       capture_last_found = TRUE;
2011       }
2012     cc += 1 + LINK_SIZE;
2013     break;
2014 
2015     case OP_CBRA:
2016     case OP_CBRAPOS:
2017     case OP_SCBRA:
2018     case OP_SCBRAPOS:
2019     stack_restore = TRUE;
2020     if (common->capture_last_ptr != 0 && !capture_last_found)
2021       {
2022       length += 2;
2023       capture_last_found = TRUE;
2024       }
2025     length += 3;
2026     cc += 1 + LINK_SIZE + IMM2_SIZE;
2027     break;
2028 
2029     case OP_THEN:
2030     stack_restore = TRUE;
2031     if (common->control_head_ptr != 0)
2032       *needs_control_head = TRUE;
2033     cc ++;
2034     break;
2035 
2036     default:
2037     stack_restore = TRUE;
2038     /* Fall through. */
2039 
2040     case OP_NOT_WORD_BOUNDARY:
2041     case OP_WORD_BOUNDARY:
2042     case OP_NOT_DIGIT:
2043     case OP_DIGIT:
2044     case OP_NOT_WHITESPACE:
2045     case OP_WHITESPACE:
2046     case OP_NOT_WORDCHAR:
2047     case OP_WORDCHAR:
2048     case OP_ANY:
2049     case OP_ALLANY:
2050     case OP_ANYBYTE:
2051     case OP_NOTPROP:
2052     case OP_PROP:
2053     case OP_ANYNL:
2054     case OP_NOT_HSPACE:
2055     case OP_HSPACE:
2056     case OP_NOT_VSPACE:
2057     case OP_VSPACE:
2058     case OP_EXTUNI:
2059     case OP_EODN:
2060     case OP_EOD:
2061     case OP_CIRC:
2062     case OP_CIRCM:
2063     case OP_DOLL:
2064     case OP_DOLLM:
2065     case OP_CHAR:
2066     case OP_CHARI:
2067     case OP_NOT:
2068     case OP_NOTI:
2069 
2070     case OP_EXACT:
2071     case OP_POSSTAR:
2072     case OP_POSPLUS:
2073     case OP_POSQUERY:
2074     case OP_POSUPTO:
2075 
2076     case OP_EXACTI:
2077     case OP_POSSTARI:
2078     case OP_POSPLUSI:
2079     case OP_POSQUERYI:
2080     case OP_POSUPTOI:
2081 
2082     case OP_NOTEXACT:
2083     case OP_NOTPOSSTAR:
2084     case OP_NOTPOSPLUS:
2085     case OP_NOTPOSQUERY:
2086     case OP_NOTPOSUPTO:
2087 
2088     case OP_NOTEXACTI:
2089     case OP_NOTPOSSTARI:
2090     case OP_NOTPOSPLUSI:
2091     case OP_NOTPOSQUERYI:
2092     case OP_NOTPOSUPTOI:
2093 
2094     case OP_TYPEEXACT:
2095     case OP_TYPEPOSSTAR:
2096     case OP_TYPEPOSPLUS:
2097     case OP_TYPEPOSQUERY:
2098     case OP_TYPEPOSUPTO:
2099 
2100     case OP_CLASS:
2101     case OP_NCLASS:
2102     case OP_XCLASS:
2103 
2104     case OP_CALLOUT:
2105     case OP_CALLOUT_STR:
2106 
2107     cc = next_opcode(common, cc);
2108     SLJIT_ASSERT(cc != NULL);
2109     break;
2110     }
2111 
2112 /* Possessive quantifiers can use a special case. */
2113 if (SLJIT_UNLIKELY(possessive == length))
2114   return stack_restore ? no_frame : no_stack;
2115 
2116 if (length > 0)
2117   return length + 1;
2118 return stack_restore ? no_frame : no_stack;
2119 }
2120 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2121 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2122 {
2123 DEFINE_COMPILER;
2124 BOOL setsom_found = FALSE;
2125 BOOL setmark_found = FALSE;
2126 /* The last capture is a local variable even for recursions. */
2127 BOOL capture_last_found = FALSE;
2128 int offset;
2129 
2130 /* >= 1 + shortest item size (2) */
2131 SLJIT_UNUSED_ARG(stacktop);
2132 SLJIT_ASSERT(stackpos >= stacktop + 2);
2133 
2134 stackpos = STACK(stackpos);
2135 if (ccend == NULL)
2136   {
2137   ccend = bracketend(cc) - (1 + LINK_SIZE);
2138   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2139     cc = next_opcode(common, cc);
2140   }
2141 
2142 SLJIT_ASSERT(cc != NULL);
2143 while (cc < ccend)
2144   switch(*cc)
2145     {
2146     case OP_SET_SOM:
2147     SLJIT_ASSERT(common->has_set_som);
2148     if (!setsom_found)
2149       {
2150       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2151       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2152       stackpos -= (int)sizeof(sljit_sw);
2153       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2154       stackpos -= (int)sizeof(sljit_sw);
2155       setsom_found = TRUE;
2156       }
2157     cc += 1;
2158     break;
2159 
2160     case OP_MARK:
2161     case OP_COMMIT_ARG:
2162     case OP_PRUNE_ARG:
2163     case OP_THEN_ARG:
2164     SLJIT_ASSERT(common->mark_ptr != 0);
2165     if (!setmark_found)
2166       {
2167       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2168       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2169       stackpos -= (int)sizeof(sljit_sw);
2170       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2171       stackpos -= (int)sizeof(sljit_sw);
2172       setmark_found = TRUE;
2173       }
2174     cc += 1 + 2 + cc[1];
2175     break;
2176 
2177     case OP_RECURSE:
2178     if (common->has_set_som && !setsom_found)
2179       {
2180       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2181       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2182       stackpos -= (int)sizeof(sljit_sw);
2183       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2184       stackpos -= (int)sizeof(sljit_sw);
2185       setsom_found = TRUE;
2186       }
2187     if (common->mark_ptr != 0 && !setmark_found)
2188       {
2189       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2190       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2191       stackpos -= (int)sizeof(sljit_sw);
2192       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2193       stackpos -= (int)sizeof(sljit_sw);
2194       setmark_found = TRUE;
2195       }
2196     if (common->capture_last_ptr != 0 && !capture_last_found)
2197       {
2198       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2199       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2200       stackpos -= (int)sizeof(sljit_sw);
2201       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2202       stackpos -= (int)sizeof(sljit_sw);
2203       capture_last_found = TRUE;
2204       }
2205     cc += 1 + LINK_SIZE;
2206     break;
2207 
2208     case OP_CBRA:
2209     case OP_CBRAPOS:
2210     case OP_SCBRA:
2211     case OP_SCBRAPOS:
2212     if (common->capture_last_ptr != 0 && !capture_last_found)
2213       {
2214       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2215       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2216       stackpos -= (int)sizeof(sljit_sw);
2217       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2218       stackpos -= (int)sizeof(sljit_sw);
2219       capture_last_found = TRUE;
2220       }
2221     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2222     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2223     stackpos -= (int)sizeof(sljit_sw);
2224     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2225     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2226     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2227     stackpos -= (int)sizeof(sljit_sw);
2228     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2229     stackpos -= (int)sizeof(sljit_sw);
2230 
2231     cc += 1 + LINK_SIZE + IMM2_SIZE;
2232     break;
2233 
2234     default:
2235     cc = next_opcode(common, cc);
2236     SLJIT_ASSERT(cc != NULL);
2237     break;
2238     }
2239 
2240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2241 SLJIT_ASSERT(stackpos == STACK(stacktop));
2242 }
2243 
2244 #define RECURSE_TMP_REG_COUNT 3
2245 
2246 typedef struct delayed_mem_copy_status {
2247   struct sljit_compiler *compiler;
2248   int store_bases[RECURSE_TMP_REG_COUNT];
2249   int store_offsets[RECURSE_TMP_REG_COUNT];
2250   int tmp_regs[RECURSE_TMP_REG_COUNT];
2251   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2252   int next_tmp_reg;
2253 } delayed_mem_copy_status;
2254 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2255 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2256 {
2257 int i;
2258 
2259 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2260   {
2261   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2262   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2263 
2264   status->store_bases[i] = -1;
2265   }
2266 status->next_tmp_reg = 0;
2267 status->compiler = common->compiler;
2268 }
2269 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2270 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2271   int store_base, sljit_sw store_offset)
2272 {
2273 struct sljit_compiler *compiler = status->compiler;
2274 int next_tmp_reg = status->next_tmp_reg;
2275 int tmp_reg = status->tmp_regs[next_tmp_reg];
2276 
2277 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2278 
2279 if (status->store_bases[next_tmp_reg] == -1)
2280   {
2281   /* Preserve virtual registers. */
2282   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2283     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2284   }
2285 else
2286   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2287 
2288 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2289 status->store_bases[next_tmp_reg] = store_base;
2290 status->store_offsets[next_tmp_reg] = store_offset;
2291 
2292 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2293 }
2294 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2295 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2296 {
2297 struct sljit_compiler *compiler = status->compiler;
2298 int next_tmp_reg = status->next_tmp_reg;
2299 int tmp_reg, saved_tmp_reg, i;
2300 
2301 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2302   {
2303   if (status->store_bases[next_tmp_reg] != -1)
2304     {
2305     tmp_reg = status->tmp_regs[next_tmp_reg];
2306     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2307 
2308     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2309 
2310     /* Restore virtual registers. */
2311     if (sljit_get_register_index(saved_tmp_reg) < 0)
2312       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2313     }
2314 
2315   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2316   }
2317 }
2318 
2319 #undef RECURSE_TMP_REG_COUNT
2320 
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2321 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2322 {
2323 uint8_t *byte;
2324 uint8_t mask;
2325 
2326 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2327 
2328 bit_index >>= SLJIT_WORD_SHIFT;
2329 
2330 mask = 1 << (bit_index & 0x7);
2331 byte = common->recurse_bitset + (bit_index >> 3);
2332 
2333 if (*byte & mask)
2334   return FALSE;
2335 
2336 *byte |= mask;
2337 return TRUE;
2338 }
2339 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2340 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2341   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2342 {
2343 int length = 1;
2344 int size, offset;
2345 PCRE2_SPTR alternative;
2346 BOOL quit_found = FALSE;
2347 BOOL accept_found = FALSE;
2348 BOOL setsom_found = FALSE;
2349 BOOL setmark_found = FALSE;
2350 BOOL control_head_found = FALSE;
2351 
2352 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2353 
2354 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2355 SLJIT_ASSERT(common->control_head_ptr != 0);
2356 control_head_found = TRUE;
2357 #endif
2358 
2359 /* Calculate the sum of the private machine words. */
2360 while (cc < ccend)
2361   {
2362   size = 0;
2363   switch(*cc)
2364     {
2365     case OP_SET_SOM:
2366     SLJIT_ASSERT(common->has_set_som);
2367     setsom_found = TRUE;
2368     cc += 1;
2369     break;
2370 
2371     case OP_RECURSE:
2372     if (common->has_set_som)
2373       setsom_found = TRUE;
2374     if (common->mark_ptr != 0)
2375       setmark_found = TRUE;
2376     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2377       length++;
2378     cc += 1 + LINK_SIZE;
2379     break;
2380 
2381     case OP_KET:
2382     offset = PRIVATE_DATA(cc);
2383     if (offset != 0)
2384       {
2385       if (recurse_check_bit(common, offset))
2386         length++;
2387       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2388       cc += PRIVATE_DATA(cc + 1);
2389       }
2390     cc += 1 + LINK_SIZE;
2391     break;
2392 
2393     case OP_ASSERT:
2394     case OP_ASSERT_NOT:
2395     case OP_ASSERTBACK:
2396     case OP_ASSERTBACK_NOT:
2397     case OP_ASSERT_NA:
2398     case OP_ASSERTBACK_NA:
2399     case OP_ONCE:
2400     case OP_SCRIPT_RUN:
2401     case OP_BRAPOS:
2402     case OP_SBRA:
2403     case OP_SBRAPOS:
2404     case OP_SCOND:
2405     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2406     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2407       length++;
2408     cc += 1 + LINK_SIZE;
2409     break;
2410 
2411     case OP_CBRA:
2412     case OP_SCBRA:
2413     offset = GET2(cc, 1 + LINK_SIZE);
2414     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2415       {
2416       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2417       length += 2;
2418       }
2419     if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2420       length++;
2421     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2422       length++;
2423     cc += 1 + LINK_SIZE + IMM2_SIZE;
2424     break;
2425 
2426     case OP_CBRAPOS:
2427     case OP_SCBRAPOS:
2428     offset = GET2(cc, 1 + LINK_SIZE);
2429     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2430       {
2431       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2432       length += 2;
2433       }
2434     if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2435       length++;
2436     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2437       length++;
2438     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2439       length++;
2440     cc += 1 + LINK_SIZE + IMM2_SIZE;
2441     break;
2442 
2443     case OP_COND:
2444     /* Might be a hidden SCOND. */
2445     alternative = cc + GET(cc, 1);
2446     if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2447       length++;
2448     cc += 1 + LINK_SIZE;
2449     break;
2450 
2451     CASE_ITERATOR_PRIVATE_DATA_1
2452     offset = PRIVATE_DATA(cc);
2453     if (offset != 0 && recurse_check_bit(common, offset))
2454       length++;
2455     cc += 2;
2456 #ifdef SUPPORT_UNICODE
2457     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2458 #endif
2459     break;
2460 
2461     CASE_ITERATOR_PRIVATE_DATA_2A
2462     offset = PRIVATE_DATA(cc);
2463     if (offset != 0 && recurse_check_bit(common, offset))
2464       {
2465       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2466       length += 2;
2467       }
2468     cc += 2;
2469 #ifdef SUPPORT_UNICODE
2470     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2471 #endif
2472     break;
2473 
2474     CASE_ITERATOR_PRIVATE_DATA_2B
2475     offset = PRIVATE_DATA(cc);
2476     if (offset != 0 && recurse_check_bit(common, offset))
2477       {
2478       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2479       length += 2;
2480       }
2481     cc += 2 + IMM2_SIZE;
2482 #ifdef SUPPORT_UNICODE
2483     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2484 #endif
2485     break;
2486 
2487     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2488     offset = PRIVATE_DATA(cc);
2489     if (offset != 0 && recurse_check_bit(common, offset))
2490       length++;
2491     cc += 1;
2492     break;
2493 
2494     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2495     offset = PRIVATE_DATA(cc);
2496     if (offset != 0 && recurse_check_bit(common, offset))
2497       {
2498       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2499       length += 2;
2500       }
2501     cc += 1;
2502     break;
2503 
2504     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2505     offset = PRIVATE_DATA(cc);
2506     if (offset != 0 && recurse_check_bit(common, offset))
2507       {
2508       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2509       length += 2;
2510       }
2511     cc += 1 + IMM2_SIZE;
2512     break;
2513 
2514     case OP_CLASS:
2515     case OP_NCLASS:
2516 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2517     case OP_XCLASS:
2518     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2519 #else
2520     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2521 #endif
2522 
2523     offset = PRIVATE_DATA(cc);
2524     if (offset != 0 && recurse_check_bit(common, offset))
2525       length += get_class_iterator_size(cc + size);
2526     cc += size;
2527     break;
2528 
2529     case OP_MARK:
2530     case OP_COMMIT_ARG:
2531     case OP_PRUNE_ARG:
2532     case OP_THEN_ARG:
2533     SLJIT_ASSERT(common->mark_ptr != 0);
2534     if (!setmark_found)
2535       setmark_found = TRUE;
2536     if (common->control_head_ptr != 0)
2537       control_head_found = TRUE;
2538     if (*cc != OP_MARK)
2539       quit_found = TRUE;
2540 
2541     cc += 1 + 2 + cc[1];
2542     break;
2543 
2544     case OP_PRUNE:
2545     case OP_SKIP:
2546     case OP_COMMIT:
2547     quit_found = TRUE;
2548     cc++;
2549     break;
2550 
2551     case OP_SKIP_ARG:
2552     quit_found = TRUE;
2553     cc += 1 + 2 + cc[1];
2554     break;
2555 
2556     case OP_THEN:
2557     SLJIT_ASSERT(common->control_head_ptr != 0);
2558     quit_found = TRUE;
2559     control_head_found = TRUE;
2560     cc++;
2561     break;
2562 
2563     case OP_ACCEPT:
2564     case OP_ASSERT_ACCEPT:
2565     accept_found = TRUE;
2566     cc++;
2567     break;
2568 
2569     default:
2570     cc = next_opcode(common, cc);
2571     SLJIT_ASSERT(cc != NULL);
2572     break;
2573     }
2574   }
2575 SLJIT_ASSERT(cc == ccend);
2576 
2577 if (control_head_found)
2578   length++;
2579 if (quit_found)
2580   {
2581   if (setsom_found)
2582     length++;
2583   if (setmark_found)
2584     length++;
2585   }
2586 
2587 *needs_control_head = control_head_found;
2588 *has_quit = quit_found;
2589 *has_accept = accept_found;
2590 return length;
2591 }
2592 
2593 enum copy_recurse_data_types {
2594   recurse_copy_from_global,
2595   recurse_copy_private_to_global,
2596   recurse_copy_shared_to_global,
2597   recurse_copy_kept_shared_to_global,
2598   recurse_swap_global
2599 };
2600 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2601 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2602   int type, int stackptr, int stacktop, BOOL has_quit)
2603 {
2604 delayed_mem_copy_status status;
2605 PCRE2_SPTR alternative;
2606 sljit_sw private_srcw[2];
2607 sljit_sw shared_srcw[3];
2608 sljit_sw kept_shared_srcw[2];
2609 int private_count, shared_count, kept_shared_count;
2610 int from_sp, base_reg, offset, i;
2611 
2612 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2613 
2614 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2615 SLJIT_ASSERT(common->control_head_ptr != 0);
2616 recurse_check_bit(common, common->control_head_ptr);
2617 #endif
2618 
2619 switch (type)
2620   {
2621   case recurse_copy_from_global:
2622   from_sp = TRUE;
2623   base_reg = STACK_TOP;
2624   break;
2625 
2626   case recurse_copy_private_to_global:
2627   case recurse_copy_shared_to_global:
2628   case recurse_copy_kept_shared_to_global:
2629   from_sp = FALSE;
2630   base_reg = STACK_TOP;
2631   break;
2632 
2633   default:
2634   SLJIT_ASSERT(type == recurse_swap_global);
2635   from_sp = FALSE;
2636   base_reg = TMP2;
2637   break;
2638   }
2639 
2640 stackptr = STACK(stackptr);
2641 stacktop = STACK(stacktop);
2642 
2643 status.tmp_regs[0] = TMP1;
2644 status.saved_tmp_regs[0] = TMP1;
2645 
2646 if (base_reg != TMP2)
2647   {
2648   status.tmp_regs[1] = TMP2;
2649   status.saved_tmp_regs[1] = TMP2;
2650   }
2651 else
2652   {
2653   status.saved_tmp_regs[1] = RETURN_ADDR;
2654   if (HAS_VIRTUAL_REGISTERS)
2655     status.tmp_regs[1] = STR_PTR;
2656   else
2657     status.tmp_regs[1] = RETURN_ADDR;
2658   }
2659 
2660 status.saved_tmp_regs[2] = TMP3;
2661 if (HAS_VIRTUAL_REGISTERS)
2662   status.tmp_regs[2] = STR_END;
2663 else
2664   status.tmp_regs[2] = TMP3;
2665 
2666 delayed_mem_copy_init(&status, common);
2667 
2668 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2669   {
2670   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2671 
2672   if (!from_sp)
2673     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2674 
2675   if (from_sp || type == recurse_swap_global)
2676     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2677   }
2678 
2679 stackptr += sizeof(sljit_sw);
2680 
2681 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2682 if (type != recurse_copy_shared_to_global)
2683   {
2684   if (!from_sp)
2685     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2686 
2687   if (from_sp || type == recurse_swap_global)
2688     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2689   }
2690 
2691 stackptr += sizeof(sljit_sw);
2692 #endif
2693 
2694 while (cc < ccend)
2695   {
2696   private_count = 0;
2697   shared_count = 0;
2698   kept_shared_count = 0;
2699 
2700   switch(*cc)
2701     {
2702     case OP_SET_SOM:
2703     SLJIT_ASSERT(common->has_set_som);
2704     if (has_quit && recurse_check_bit(common, OVECTOR(0)))
2705       {
2706       kept_shared_srcw[0] = OVECTOR(0);
2707       kept_shared_count = 1;
2708       }
2709     cc += 1;
2710     break;
2711 
2712     case OP_RECURSE:
2713     if (has_quit)
2714       {
2715       if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2716         {
2717         kept_shared_srcw[0] = OVECTOR(0);
2718         kept_shared_count = 1;
2719         }
2720       if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2721         {
2722         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2723         kept_shared_count++;
2724         }
2725       }
2726     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2727       {
2728       shared_srcw[0] = common->capture_last_ptr;
2729       shared_count = 1;
2730       }
2731     cc += 1 + LINK_SIZE;
2732     break;
2733 
2734     case OP_KET:
2735     private_srcw[0] = PRIVATE_DATA(cc);
2736     if (private_srcw[0] != 0)
2737       {
2738       if (recurse_check_bit(common, private_srcw[0]))
2739         private_count = 1;
2740       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2741       cc += PRIVATE_DATA(cc + 1);
2742       }
2743     cc += 1 + LINK_SIZE;
2744     break;
2745 
2746     case OP_ASSERT:
2747     case OP_ASSERT_NOT:
2748     case OP_ASSERTBACK:
2749     case OP_ASSERTBACK_NOT:
2750     case OP_ASSERT_NA:
2751     case OP_ASSERTBACK_NA:
2752     case OP_ONCE:
2753     case OP_SCRIPT_RUN:
2754     case OP_BRAPOS:
2755     case OP_SBRA:
2756     case OP_SBRAPOS:
2757     case OP_SCOND:
2758     private_srcw[0] = PRIVATE_DATA(cc);
2759     if (recurse_check_bit(common, private_srcw[0]))
2760       private_count = 1;
2761     cc += 1 + LINK_SIZE;
2762     break;
2763 
2764     case OP_CBRA:
2765     case OP_SCBRA:
2766     offset = GET2(cc, 1 + LINK_SIZE);
2767     shared_srcw[0] = OVECTOR(offset << 1);
2768     if (recurse_check_bit(common, shared_srcw[0]))
2769       {
2770       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2771       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2772       shared_count = 2;
2773       }
2774 
2775     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2776       {
2777       shared_srcw[shared_count] = common->capture_last_ptr;
2778       shared_count++;
2779       }
2780 
2781     if (common->optimized_cbracket[offset] == 0)
2782       {
2783       private_srcw[0] = OVECTOR_PRIV(offset);
2784       if (recurse_check_bit(common, private_srcw[0]))
2785         private_count = 1;
2786       }
2787 
2788     cc += 1 + LINK_SIZE + IMM2_SIZE;
2789     break;
2790 
2791     case OP_CBRAPOS:
2792     case OP_SCBRAPOS:
2793     offset = GET2(cc, 1 + LINK_SIZE);
2794     shared_srcw[0] = OVECTOR(offset << 1);
2795     if (recurse_check_bit(common, shared_srcw[0]))
2796       {
2797       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2798       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2799       shared_count = 2;
2800       }
2801 
2802     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2803       {
2804       shared_srcw[shared_count] = common->capture_last_ptr;
2805       shared_count++;
2806       }
2807 
2808     private_srcw[0] = PRIVATE_DATA(cc);
2809     if (recurse_check_bit(common, private_srcw[0]))
2810       private_count = 1;
2811 
2812     offset = OVECTOR_PRIV(offset);
2813     if (recurse_check_bit(common, offset))
2814       {
2815       private_srcw[private_count] = offset;
2816       private_count++;
2817       }
2818     cc += 1 + LINK_SIZE + IMM2_SIZE;
2819     break;
2820 
2821     case OP_COND:
2822     /* Might be a hidden SCOND. */
2823     alternative = cc + GET(cc, 1);
2824     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2825       {
2826       private_srcw[0] = PRIVATE_DATA(cc);
2827       if (recurse_check_bit(common, private_srcw[0]))
2828         private_count = 1;
2829       }
2830     cc += 1 + LINK_SIZE;
2831     break;
2832 
2833     CASE_ITERATOR_PRIVATE_DATA_1
2834     private_srcw[0] = PRIVATE_DATA(cc);
2835     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2836       private_count = 1;
2837     cc += 2;
2838 #ifdef SUPPORT_UNICODE
2839     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2840 #endif
2841     break;
2842 
2843     CASE_ITERATOR_PRIVATE_DATA_2A
2844     private_srcw[0] = PRIVATE_DATA(cc);
2845     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2846       {
2847       private_count = 2;
2848       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2849       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2850       }
2851     cc += 2;
2852 #ifdef SUPPORT_UNICODE
2853     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2854 #endif
2855     break;
2856 
2857     CASE_ITERATOR_PRIVATE_DATA_2B
2858     private_srcw[0] = PRIVATE_DATA(cc);
2859     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2860       {
2861       private_count = 2;
2862       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2863       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2864       }
2865     cc += 2 + IMM2_SIZE;
2866 #ifdef SUPPORT_UNICODE
2867     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2868 #endif
2869     break;
2870 
2871     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2872     private_srcw[0] = PRIVATE_DATA(cc);
2873     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2874       private_count = 1;
2875     cc += 1;
2876     break;
2877 
2878     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2879     private_srcw[0] = PRIVATE_DATA(cc);
2880     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2881       {
2882       private_count = 2;
2883       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2884       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2885       }
2886     cc += 1;
2887     break;
2888 
2889     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2890     private_srcw[0] = PRIVATE_DATA(cc);
2891     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2892       {
2893       private_count = 2;
2894       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2895       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2896       }
2897     cc += 1 + IMM2_SIZE;
2898     break;
2899 
2900     case OP_CLASS:
2901     case OP_NCLASS:
2902 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2903     case OP_XCLASS:
2904     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2905 #else
2906     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2907 #endif
2908     if (PRIVATE_DATA(cc) != 0)
2909       switch(get_class_iterator_size(cc + i))
2910         {
2911         case 1:
2912         private_srcw[0] = PRIVATE_DATA(cc);
2913         break;
2914 
2915         case 2:
2916         private_srcw[0] = PRIVATE_DATA(cc);
2917         if (recurse_check_bit(common, private_srcw[0]))
2918           {
2919           private_count = 2;
2920           private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2921           SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2922           }
2923         break;
2924 
2925         default:
2926         SLJIT_UNREACHABLE();
2927         break;
2928         }
2929     cc += i;
2930     break;
2931 
2932     case OP_MARK:
2933     case OP_COMMIT_ARG:
2934     case OP_PRUNE_ARG:
2935     case OP_THEN_ARG:
2936     SLJIT_ASSERT(common->mark_ptr != 0);
2937     if (has_quit && recurse_check_bit(common, common->mark_ptr))
2938       {
2939       kept_shared_srcw[0] = common->mark_ptr;
2940       kept_shared_count = 1;
2941       }
2942     if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2943       {
2944       private_srcw[0] = common->control_head_ptr;
2945       private_count = 1;
2946       }
2947     cc += 1 + 2 + cc[1];
2948     break;
2949 
2950     case OP_THEN:
2951     SLJIT_ASSERT(common->control_head_ptr != 0);
2952     if (recurse_check_bit(common, common->control_head_ptr))
2953       {
2954       private_srcw[0] = common->control_head_ptr;
2955       private_count = 1;
2956       }
2957     cc++;
2958     break;
2959 
2960     default:
2961     cc = next_opcode(common, cc);
2962     SLJIT_ASSERT(cc != NULL);
2963     continue;
2964     }
2965 
2966   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2967     {
2968     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2969 
2970     for (i = 0; i < private_count; i++)
2971       {
2972       SLJIT_ASSERT(private_srcw[i] != 0);
2973 
2974       if (!from_sp)
2975         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2976 
2977       if (from_sp || type == recurse_swap_global)
2978         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2979 
2980       stackptr += sizeof(sljit_sw);
2981       }
2982     }
2983   else
2984     stackptr += sizeof(sljit_sw) * private_count;
2985 
2986   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2987     {
2988     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2989 
2990     for (i = 0; i < shared_count; i++)
2991       {
2992       SLJIT_ASSERT(shared_srcw[i] != 0);
2993 
2994       if (!from_sp)
2995         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2996 
2997       if (from_sp || type == recurse_swap_global)
2998         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2999 
3000       stackptr += sizeof(sljit_sw);
3001       }
3002     }
3003   else
3004     stackptr += sizeof(sljit_sw) * shared_count;
3005 
3006   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3007     {
3008     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3009 
3010     for (i = 0; i < kept_shared_count; i++)
3011       {
3012       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3013 
3014       if (!from_sp)
3015         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3016 
3017       if (from_sp || type == recurse_swap_global)
3018         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3019 
3020       stackptr += sizeof(sljit_sw);
3021       }
3022     }
3023   else
3024     stackptr += sizeof(sljit_sw) * kept_shared_count;
3025   }
3026 
3027 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3028 
3029 delayed_mem_copy_finish(&status);
3030 }
3031 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3032 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3033 {
3034 PCRE2_SPTR end = bracketend(cc);
3035 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3036 
3037 /* Assert captures then. */
3038 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3039   current_offset = NULL;
3040 /* Conditional block does not. */
3041 if (*cc == OP_COND || *cc == OP_SCOND)
3042   has_alternatives = FALSE;
3043 
3044 cc = next_opcode(common, cc);
3045 if (has_alternatives)
3046   current_offset = common->then_offsets + (cc - common->start);
3047 
3048 while (cc < end)
3049   {
3050   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3051     cc = set_then_offsets(common, cc, current_offset);
3052   else
3053     {
3054     if (*cc == OP_ALT && has_alternatives)
3055       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3056     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3057       *current_offset = 1;
3058     cc = next_opcode(common, cc);
3059     }
3060   }
3061 
3062 return end;
3063 }
3064 
3065 #undef CASE_ITERATOR_PRIVATE_DATA_1
3066 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3067 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3068 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3069 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3070 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3071 
is_powerof2(unsigned int value)3072 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3073 {
3074 return (value & (value - 1)) == 0;
3075 }
3076 
set_jumps(jump_list * list,struct sljit_label * label)3077 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3078 {
3079 while (list)
3080   {
3081   /* sljit_set_label is clever enough to do nothing
3082   if either the jump or the label is NULL. */
3083   SET_LABEL(list->jump, label);
3084   list = list->next;
3085   }
3086 }
3087 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3088 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3089 {
3090 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3091 if (list_item)
3092   {
3093   list_item->next = *list;
3094   list_item->jump = jump;
3095   *list = list_item;
3096   }
3097 }
3098 
add_stub(compiler_common * common,struct sljit_jump * start)3099 static void add_stub(compiler_common *common, struct sljit_jump *start)
3100 {
3101 DEFINE_COMPILER;
3102 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3103 
3104 if (list_item)
3105   {
3106   list_item->start = start;
3107   list_item->quit = LABEL();
3108   list_item->next = common->stubs;
3109   common->stubs = list_item;
3110   }
3111 }
3112 
flush_stubs(compiler_common * common)3113 static void flush_stubs(compiler_common *common)
3114 {
3115 DEFINE_COMPILER;
3116 stub_list *list_item = common->stubs;
3117 
3118 while (list_item)
3119   {
3120   JUMPHERE(list_item->start);
3121   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3122   JUMPTO(SLJIT_JUMP, list_item->quit);
3123   list_item = list_item->next;
3124   }
3125 common->stubs = NULL;
3126 }
3127 
count_match(compiler_common * common)3128 static SLJIT_INLINE void count_match(compiler_common *common)
3129 {
3130 DEFINE_COMPILER;
3131 
3132 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3133 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3134 }
3135 
allocate_stack(compiler_common * common,int size)3136 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3137 {
3138 /* May destroy all locals and registers except TMP2. */
3139 DEFINE_COMPILER;
3140 
3141 SLJIT_ASSERT(size > 0);
3142 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3143 #ifdef DESTROY_REGISTERS
3144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3145 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3146 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3149 #endif
3150 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3151 }
3152 
free_stack(compiler_common * common,int size)3153 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3154 {
3155 DEFINE_COMPILER;
3156 
3157 SLJIT_ASSERT(size > 0);
3158 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3159 }
3160 
allocate_read_only_data(compiler_common * common,sljit_uw size)3161 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3162 {
3163 DEFINE_COMPILER;
3164 sljit_uw *result;
3165 
3166 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3167   return NULL;
3168 
3169 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3170 if (SLJIT_UNLIKELY(result == NULL))
3171   {
3172   sljit_set_compiler_memory_error(compiler);
3173   return NULL;
3174   }
3175 
3176 *(void**)result = common->read_only_data_head;
3177 common->read_only_data_head = (void *)result;
3178 return result + 1;
3179 }
3180 
reset_ovector(compiler_common * common,int length)3181 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3182 {
3183 DEFINE_COMPILER;
3184 struct sljit_label *loop;
3185 sljit_s32 i;
3186 
3187 /* At this point we can freely use all temporary registers. */
3188 SLJIT_ASSERT(length > 1);
3189 /* TMP1 returns with begin - 1. */
3190 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3191 if (length < 8)
3192   {
3193   for (i = 1; i < length; i++)
3194     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3195   }
3196 else
3197   {
3198   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3199     {
3200     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3201     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3202     loop = LABEL();
3203     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3204     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3205     JUMPTO(SLJIT_NOT_ZERO, loop);
3206     }
3207   else
3208     {
3209     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3210     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3211     loop = LABEL();
3212     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3213     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3214     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3215     JUMPTO(SLJIT_NOT_ZERO, loop);
3216     }
3217   }
3218 }
3219 
reset_early_fail(compiler_common * common)3220 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3221 {
3222 DEFINE_COMPILER;
3223 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3224 sljit_u32 uncleared_size;
3225 sljit_s32 src = SLJIT_IMM;
3226 sljit_s32 i;
3227 struct sljit_label *loop;
3228 
3229 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3230 
3231 if (size == sizeof(sljit_sw))
3232   {
3233   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3234   return;
3235   }
3236 
3237 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3238   {
3239   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3240   src = TMP3;
3241   }
3242 
3243 if (size <= 6 * sizeof(sljit_sw))
3244   {
3245   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3246     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3247   return;
3248   }
3249 
3250 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3251 
3252 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3253 
3254 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3255 
3256 loop = LABEL();
3257 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3259 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3260 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3261 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3262 
3263 if (uncleared_size >= sizeof(sljit_sw))
3264   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3265 
3266 if (uncleared_size >= 2 * sizeof(sljit_sw))
3267   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3268 }
3269 
do_reset_match(compiler_common * common,int length)3270 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3271 {
3272 DEFINE_COMPILER;
3273 struct sljit_label *loop;
3274 int i;
3275 
3276 SLJIT_ASSERT(length > 1);
3277 /* OVECTOR(1) contains the "string begin - 1" constant. */
3278 if (length > 2)
3279   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3280 if (length < 8)
3281   {
3282   for (i = 2; i < length; i++)
3283     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3284   }
3285 else
3286   {
3287   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3288     {
3289     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3290     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3291     loop = LABEL();
3292     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3293     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3294     JUMPTO(SLJIT_NOT_ZERO, loop);
3295     }
3296   else
3297     {
3298     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3299     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3300     loop = LABEL();
3301     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3302     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3303     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3304     JUMPTO(SLJIT_NOT_ZERO, loop);
3305     }
3306   }
3307 
3308 if (!HAS_VIRTUAL_REGISTERS)
3309   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3310 else
3311   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3312 
3313 if (common->mark_ptr != 0)
3314   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3315 if (common->control_head_ptr != 0)
3316   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3317 if (HAS_VIRTUAL_REGISTERS)
3318   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3319 
3320 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3321 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3322 }
3323 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3324 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3325 {
3326 while (current != NULL)
3327   {
3328   switch (current[1])
3329     {
3330     case type_then_trap:
3331     break;
3332 
3333     case type_mark:
3334     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3335       return current[3];
3336     break;
3337 
3338     default:
3339     SLJIT_UNREACHABLE();
3340     break;
3341     }
3342   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3343   current = (sljit_sw*)current[0];
3344   }
3345 return 0;
3346 }
3347 
copy_ovector(compiler_common * common,int topbracket)3348 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3349 {
3350 DEFINE_COMPILER;
3351 struct sljit_label *loop;
3352 BOOL has_pre;
3353 
3354 /* At this point we can freely use all registers. */
3355 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3357 
3358 if (HAS_VIRTUAL_REGISTERS)
3359   {
3360   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3361   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3362   if (common->mark_ptr != 0)
3363     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3364   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3365   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3366   if (common->mark_ptr != 0)
3367     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3368   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3369     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3370   }
3371 else
3372   {
3373   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3374   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3375   if (common->mark_ptr != 0)
3376     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3377   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3378   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3379   if (common->mark_ptr != 0)
3380     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3381   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3382   }
3383 
3384 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3385 
3386 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3387 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3388 
3389 loop = LABEL();
3390 
3391 if (has_pre)
3392   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3393 else
3394   {
3395   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3396   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3397   }
3398 
3399 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3400 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3401 /* Copy the integer value to the output buffer */
3402 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3403 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3404 #endif
3405 
3406 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3407 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3408 
3409 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3410 JUMPTO(SLJIT_NOT_ZERO, loop);
3411 
3412 /* Calculate the return value, which is the maximum ovector value. */
3413 if (topbracket > 1)
3414   {
3415   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3416     {
3417     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3418     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3419 
3420     /* OVECTOR(0) is never equal to SLJIT_S2. */
3421     loop = LABEL();
3422     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3423     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3424     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3425     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3426     }
3427   else
3428     {
3429     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3430     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3431 
3432     /* OVECTOR(0) is never equal to SLJIT_S2. */
3433     loop = LABEL();
3434     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3435     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3436     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3437     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3438     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3439     }
3440   }
3441 else
3442   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3443 }
3444 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3445 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3446 {
3447 DEFINE_COMPILER;
3448 sljit_s32 mov_opcode;
3449 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3450 
3451 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3452 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3453   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3454 
3455 if (arguments_reg != ARGUMENTS)
3456   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3457 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3458   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3459 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3460 
3461 /* Store match begin and end. */
3462 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3463 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3464 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3465 
3466 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3467 
3468 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3469 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3470 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3471 #endif
3472 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3473 
3474 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3475 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3476 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3477 #endif
3478 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3479 
3480 JUMPTO(SLJIT_JUMP, quit);
3481 }
3482 
check_start_used_ptr(compiler_common * common)3483 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3484 {
3485 /* May destroy TMP1. */
3486 DEFINE_COMPILER;
3487 struct sljit_jump *jump;
3488 
3489 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3490   {
3491   /* The value of -1 must be kept for start_used_ptr! */
3492   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3493   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3494   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3495   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3496   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3497   JUMPHERE(jump);
3498   }
3499 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3500   {
3501   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3503   JUMPHERE(jump);
3504   }
3505 }
3506 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3507 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3508 {
3509 /* Detects if the character has an othercase. */
3510 unsigned int c;
3511 
3512 #ifdef SUPPORT_UNICODE
3513 if (common->utf || common->ucp)
3514   {
3515   if (common->utf)
3516     {
3517     GETCHAR(c, cc);
3518     }
3519   else
3520     c = *cc;
3521 
3522   if (c > 127)
3523     return c != UCD_OTHERCASE(c);
3524 
3525   return common->fcc[c] != c;
3526   }
3527 else
3528 #endif
3529   c = *cc;
3530 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3531 }
3532 
char_othercase(compiler_common * common,unsigned int c)3533 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3534 {
3535 /* Returns with the othercase. */
3536 #ifdef SUPPORT_UNICODE
3537 if ((common->utf || common->ucp) && c > 127)
3538   return UCD_OTHERCASE(c);
3539 #endif
3540 return TABLE_GET(c, common->fcc, c);
3541 }
3542 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3543 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3544 {
3545 /* Detects if the character and its othercase has only 1 bit difference. */
3546 unsigned int c, oc, bit;
3547 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3548 int n;
3549 #endif
3550 
3551 #ifdef SUPPORT_UNICODE
3552 if (common->utf || common->ucp)
3553   {
3554   if (common->utf)
3555     {
3556     GETCHAR(c, cc);
3557     }
3558   else
3559     c = *cc;
3560 
3561   if (c <= 127)
3562     oc = common->fcc[c];
3563   else
3564     oc = UCD_OTHERCASE(c);
3565   }
3566 else
3567   {
3568   c = *cc;
3569   oc = TABLE_GET(c, common->fcc, c);
3570   }
3571 #else
3572 c = *cc;
3573 oc = TABLE_GET(c, common->fcc, c);
3574 #endif
3575 
3576 SLJIT_ASSERT(c != oc);
3577 
3578 bit = c ^ oc;
3579 /* Optimized for English alphabet. */
3580 if (c <= 127 && bit == 0x20)
3581   return (0 << 8) | 0x20;
3582 
3583 /* Since c != oc, they must have at least 1 bit difference. */
3584 if (!is_powerof2(bit))
3585   return 0;
3586 
3587 #if PCRE2_CODE_UNIT_WIDTH == 8
3588 
3589 #ifdef SUPPORT_UNICODE
3590 if (common->utf && c > 127)
3591   {
3592   n = GET_EXTRALEN(*cc);
3593   while ((bit & 0x3f) == 0)
3594     {
3595     n--;
3596     bit >>= 6;
3597     }
3598   return (n << 8) | bit;
3599   }
3600 #endif /* SUPPORT_UNICODE */
3601 return (0 << 8) | bit;
3602 
3603 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3604 
3605 #ifdef SUPPORT_UNICODE
3606 if (common->utf && c > 65535)
3607   {
3608   if (bit >= (1u << 10))
3609     bit >>= 10;
3610   else
3611     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3612   }
3613 #endif /* SUPPORT_UNICODE */
3614 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3615 
3616 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3617 }
3618 
check_partial(compiler_common * common,BOOL force)3619 static void check_partial(compiler_common *common, BOOL force)
3620 {
3621 /* Checks whether a partial matching is occurred. Does not modify registers. */
3622 DEFINE_COMPILER;
3623 struct sljit_jump *jump = NULL;
3624 
3625 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3626 
3627 if (common->mode == PCRE2_JIT_COMPLETE)
3628   return;
3629 
3630 if (!force && !common->allow_empty_partial)
3631   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3632 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3633   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3634 
3635 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3636   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3637 else
3638   {
3639   if (common->partialmatchlabel != NULL)
3640     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3641   else
3642     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3643   }
3644 
3645 if (jump != NULL)
3646   JUMPHERE(jump);
3647 }
3648 
check_str_end(compiler_common * common,jump_list ** end_reached)3649 static void check_str_end(compiler_common *common, jump_list **end_reached)
3650 {
3651 /* Does not affect registers. Usually used in a tight spot. */
3652 DEFINE_COMPILER;
3653 struct sljit_jump *jump;
3654 
3655 if (common->mode == PCRE2_JIT_COMPLETE)
3656   {
3657   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3658   return;
3659   }
3660 
3661 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3662 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3663   {
3664   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3665   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3666   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3667   }
3668 else
3669   {
3670   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3671   if (common->partialmatchlabel != NULL)
3672     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3673   else
3674     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3675   }
3676 JUMPHERE(jump);
3677 }
3678 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3679 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3680 {
3681 DEFINE_COMPILER;
3682 struct sljit_jump *jump;
3683 
3684 if (common->mode == PCRE2_JIT_COMPLETE)
3685   {
3686   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3687   return;
3688   }
3689 
3690 /* Partial matching mode. */
3691 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3692 if (!common->allow_empty_partial)
3693   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3694 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3695   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3696 
3697 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3698   {
3699   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3700   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3701   }
3702 else
3703   {
3704   if (common->partialmatchlabel != NULL)
3705     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3706   else
3707     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3708   }
3709 JUMPHERE(jump);
3710 }
3711 
process_partial_match(compiler_common * common)3712 static void process_partial_match(compiler_common *common)
3713 {
3714 DEFINE_COMPILER;
3715 struct sljit_jump *jump;
3716 
3717 /* Partial matching mode. */
3718 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3719   {
3720   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3721   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3722   JUMPHERE(jump);
3723   }
3724 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3725   {
3726   if (common->partialmatchlabel != NULL)
3727     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3728   else
3729     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3730   }
3731 }
3732 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3733 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3734 {
3735 DEFINE_COMPILER;
3736 
3737 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3738 process_partial_match(common);
3739 }
3740 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3741 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3742 {
3743 /* Reads the character into TMP1, keeps STR_PTR.
3744 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3745 DEFINE_COMPILER;
3746 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3747 struct sljit_jump *jump;
3748 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3749 
3750 SLJIT_UNUSED_ARG(max);
3751 SLJIT_UNUSED_ARG(dst);
3752 SLJIT_UNUSED_ARG(dstw);
3753 SLJIT_UNUSED_ARG(backtracks);
3754 
3755 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3756 
3757 #ifdef SUPPORT_UNICODE
3758 #if PCRE2_CODE_UNIT_WIDTH == 8
3759 if (common->utf)
3760   {
3761   if (max < 128) return;
3762 
3763   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3764   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3765   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3766   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3767   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3768   if (backtracks && common->invalid_utf)
3769     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3770   JUMPHERE(jump);
3771   }
3772 #elif PCRE2_CODE_UNIT_WIDTH == 16
3773 if (common->utf)
3774   {
3775   if (max < 0xd800) return;
3776 
3777   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3778 
3779   if (common->invalid_utf)
3780     {
3781     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3782     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3783     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3784     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3785     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3786     if (backtracks && common->invalid_utf)
3787       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3788     }
3789   else
3790     {
3791     /* TMP2 contains the high surrogate. */
3792     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3793     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3794     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3795     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3796     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3797     }
3798 
3799   JUMPHERE(jump);
3800   }
3801 #elif PCRE2_CODE_UNIT_WIDTH == 32
3802 if (common->invalid_utf)
3803   {
3804   if (max < 0xd800) return;
3805 
3806   if (backtracks != NULL)
3807     {
3808     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3809     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3810     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3811     }
3812   else
3813     {
3814     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3815     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3816     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3817     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3818     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3819     }
3820   }
3821 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3822 #endif /* SUPPORT_UNICODE */
3823 }
3824 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3825 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3826 {
3827 /* Reads one character back without moving STR_PTR. TMP2 must
3828 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3829 DEFINE_COMPILER;
3830 
3831 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3832 struct sljit_jump *jump;
3833 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3834 
3835 SLJIT_UNUSED_ARG(max);
3836 SLJIT_UNUSED_ARG(backtracks);
3837 
3838 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3839 
3840 #ifdef SUPPORT_UNICODE
3841 #if PCRE2_CODE_UNIT_WIDTH == 8
3842 if (common->utf)
3843   {
3844   if (max < 128) return;
3845 
3846   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3847   if (common->invalid_utf)
3848     {
3849     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3850     if (backtracks != NULL)
3851       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3852     }
3853   else
3854     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3855   JUMPHERE(jump);
3856   }
3857 #elif PCRE2_CODE_UNIT_WIDTH == 16
3858 if (common->utf)
3859   {
3860   if (max < 0xd800) return;
3861 
3862   if (common->invalid_utf)
3863     {
3864     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3865     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3866     if (backtracks != NULL)
3867       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3868     }
3869   else
3870     {
3871     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3872     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3873     /* TMP2 contains the low surrogate. */
3874     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3875     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3876     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3877     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3878     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3879     }
3880     JUMPHERE(jump);
3881   }
3882 #elif PCRE2_CODE_UNIT_WIDTH == 32
3883 if (common->invalid_utf)
3884   {
3885   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3886   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3887   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3888   }
3889 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3890 #endif /* SUPPORT_UNICODE */
3891 }
3892 
3893 #define READ_CHAR_UPDATE_STR_PTR 0x1
3894 #define READ_CHAR_UTF8_NEWLINE 0x2
3895 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3896 #define READ_CHAR_VALID_UTF 0x4
3897 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3898 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3899   jump_list **backtracks, sljit_u32 options)
3900 {
3901 /* Reads the precise value of a character into TMP1, if the character is
3902 between min and max (c >= min && c <= max). Otherwise it returns with a value
3903 outside the range. Does not check STR_END. */
3904 DEFINE_COMPILER;
3905 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3906 struct sljit_jump *jump;
3907 #endif
3908 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3909 struct sljit_jump *jump2;
3910 #endif
3911 
3912 SLJIT_UNUSED_ARG(min);
3913 SLJIT_UNUSED_ARG(max);
3914 SLJIT_UNUSED_ARG(backtracks);
3915 SLJIT_UNUSED_ARG(options);
3916 SLJIT_ASSERT(min <= max);
3917 
3918 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3920 
3921 #ifdef SUPPORT_UNICODE
3922 #if PCRE2_CODE_UNIT_WIDTH == 8
3923 if (common->utf)
3924   {
3925   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3926 
3927   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3928     {
3929     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3930 
3931     if (options & READ_CHAR_UTF8_NEWLINE)
3932       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3933     else
3934       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3935 
3936     if (backtracks != NULL)
3937       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3938     JUMPHERE(jump);
3939     return;
3940     }
3941 
3942   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3943   if (min >= 0x10000)
3944     {
3945     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3946     if (options & READ_CHAR_UPDATE_STR_PTR)
3947       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3948     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3949     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3950     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3951     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3952     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3953     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3954     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3955     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3956     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3957     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3958     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3959       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3960     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3961     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3962     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3963     JUMPHERE(jump2);
3964     if (options & READ_CHAR_UPDATE_STR_PTR)
3965       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3966     }
3967   else if (min >= 0x800 && max <= 0xffff)
3968     {
3969     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3970     if (options & READ_CHAR_UPDATE_STR_PTR)
3971       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3972     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3973     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3974     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3975     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3976     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3977     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3978     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3979       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3980     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3981     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3982     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3983     JUMPHERE(jump2);
3984     if (options & READ_CHAR_UPDATE_STR_PTR)
3985       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3986     }
3987   else if (max >= 0x800)
3988     {
3989     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3990     }
3991   else if (max < 128)
3992     {
3993     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3994     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3995     }
3996   else
3997     {
3998     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3999     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4000       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4001     else
4002       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4003     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4004     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4005     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4006     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4007     if (options & READ_CHAR_UPDATE_STR_PTR)
4008       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4009     }
4010   JUMPHERE(jump);
4011   }
4012 #elif PCRE2_CODE_UNIT_WIDTH == 16
4013 if (common->utf)
4014   {
4015   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4016 
4017   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4018     {
4019     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4020     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4021 
4022     if (options & READ_CHAR_UTF8_NEWLINE)
4023       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4024     else
4025       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4026 
4027     if (backtracks != NULL)
4028       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4029     JUMPHERE(jump);
4030     return;
4031     }
4032 
4033   if (max >= 0x10000)
4034     {
4035     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4036     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4037     /* TMP2 contains the high surrogate. */
4038     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4039     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4040     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4041     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4042     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4043     JUMPHERE(jump);
4044     return;
4045     }
4046 
4047   /* Skip low surrogate if necessary. */
4048   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4049 
4050   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4051     {
4052     if (options & READ_CHAR_UPDATE_STR_PTR)
4053       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4054     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4055     if (options & READ_CHAR_UPDATE_STR_PTR)
4056       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4057     if (max >= 0xd800)
4058       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4059     }
4060   else
4061     {
4062     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4063     if (options & READ_CHAR_UPDATE_STR_PTR)
4064       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4065     if (max >= 0xd800)
4066       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4067     JUMPHERE(jump);
4068     }
4069   }
4070 #elif PCRE2_CODE_UNIT_WIDTH == 32
4071 if (common->invalid_utf)
4072   {
4073   if (backtracks != NULL)
4074     {
4075     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4076     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4077     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4078     }
4079   else
4080     {
4081     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4082     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4083     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4084     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4085     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4086     }
4087   }
4088 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4089 #endif /* SUPPORT_UNICODE */
4090 }
4091 
4092 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4093 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4094 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4095 {
4096 /* Tells whether the character codes below 128 are enough
4097 to determine a match. */
4098 const sljit_u8 value = nclass ? 0xff : 0;
4099 const sljit_u8 *end = bitset + 32;
4100 
4101 bitset += 16;
4102 do
4103   {
4104   if (*bitset++ != value)
4105     return FALSE;
4106   }
4107 while (bitset < end);
4108 return TRUE;
4109 }
4110 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4111 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4112 {
4113 /* Reads the precise character type of a character into TMP1, if the character
4114 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4115 full_read argument tells whether characters above max are accepted or not. */
4116 DEFINE_COMPILER;
4117 struct sljit_jump *jump;
4118 
4119 SLJIT_ASSERT(common->utf);
4120 
4121 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4123 
4124 /* All values > 127 are zero in ctypes. */
4125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4126 
4127 if (negated)
4128   {
4129   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4130 
4131   if (common->invalid_utf)
4132     {
4133     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4134     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4135     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4136     }
4137   else
4138     {
4139     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4140     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4141     }
4142   JUMPHERE(jump);
4143   }
4144 }
4145 
4146 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4147 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4148 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4149 {
4150 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4151 DEFINE_COMPILER;
4152 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4153 struct sljit_jump *jump;
4154 #endif
4155 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4156 struct sljit_jump *jump2;
4157 #endif
4158 
4159 SLJIT_UNUSED_ARG(backtracks);
4160 SLJIT_UNUSED_ARG(negated);
4161 
4162 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4164 
4165 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4166 if (common->utf)
4167   {
4168   /* The result of this read may be unused, but saves an "else" part. */
4169   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4170   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4171 
4172   if (!negated)
4173     {
4174     if (common->invalid_utf)
4175       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4176 
4177     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4178     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4180     if (common->invalid_utf)
4181       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4182 
4183     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4184     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4185     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4186     if (common->invalid_utf)
4187       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4188 
4189     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4190     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4191     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4192     JUMPHERE(jump2);
4193     }
4194   else if (common->invalid_utf)
4195     {
4196     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4197     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4198     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4199 
4200     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4201     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4202     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4203     JUMPHERE(jump2);
4204     }
4205   else
4206     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4207 
4208   JUMPHERE(jump);
4209   return;
4210   }
4211 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4212 
4213 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4214 if (common->invalid_utf && negated)
4215   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4216 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4217 
4218 #if PCRE2_CODE_UNIT_WIDTH != 8
4219 /* The ctypes array contains only 256 values. */
4220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4221 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4222 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4223 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4224 #if PCRE2_CODE_UNIT_WIDTH != 8
4225 JUMPHERE(jump);
4226 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4227 
4228 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4229 if (common->utf && negated)
4230   {
4231   /* Skip low surrogate if necessary. */
4232   if (!common->invalid_utf)
4233     {
4234     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4235 
4236     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4237       {
4238       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4239       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4240       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4241       }
4242     else
4243       {
4244       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4245       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246       JUMPHERE(jump);
4247       }
4248     return;
4249     }
4250 
4251   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4252   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4253   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4254   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4255 
4256   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4257   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4258   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4259   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4260 
4261   JUMPHERE(jump);
4262   return;
4263   }
4264 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4265 }
4266 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4267 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4268 {
4269 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4270 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4271 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4272 DEFINE_COMPILER;
4273 
4274 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4275 struct sljit_jump *jump;
4276 #endif
4277 
4278 #ifdef SUPPORT_UNICODE
4279 #if PCRE2_CODE_UNIT_WIDTH == 8
4280 struct sljit_label *label;
4281 
4282 if (common->utf)
4283   {
4284   if (!must_be_valid && common->invalid_utf)
4285     {
4286     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4287     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4288     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4289     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4290     if (backtracks != NULL)
4291       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4292     JUMPHERE(jump);
4293     return;
4294     }
4295 
4296   label = LABEL();
4297   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4298   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4299   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4300   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4301   return;
4302   }
4303 #elif PCRE2_CODE_UNIT_WIDTH == 16
4304 if (common->utf)
4305   {
4306   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4307   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4308 
4309   if (!must_be_valid && common->invalid_utf)
4310     {
4311     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4312     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4313     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4314     if (backtracks != NULL)
4315       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4316     JUMPHERE(jump);
4317     return;
4318     }
4319 
4320   /* Skip low surrogate if necessary. */
4321   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4322   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4323   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4324   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4325   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4326   return;
4327   }
4328 #elif PCRE2_CODE_UNIT_WIDTH == 32
4329 if (common->invalid_utf && !must_be_valid)
4330   {
4331   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4332   if (backtracks != NULL)
4333     {
4334     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4335     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336     return;
4337     }
4338 
4339   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4340   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4341   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4342   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4343   return;
4344   }
4345 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4346 #endif /* SUPPORT_UNICODE */
4347 
4348 SLJIT_UNUSED_ARG(backtracks);
4349 SLJIT_UNUSED_ARG(must_be_valid);
4350 
4351 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4352 }
4353 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4354 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4355 {
4356 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4357 DEFINE_COMPILER;
4358 struct sljit_jump *jump;
4359 
4360 if (nltype == NLTYPE_ANY)
4361   {
4362   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4363   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4364   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4365   }
4366 else if (nltype == NLTYPE_ANYCRLF)
4367   {
4368   if (jumpifmatch)
4369     {
4370     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4371     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4372     }
4373   else
4374     {
4375     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4376     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377     JUMPHERE(jump);
4378     }
4379   }
4380 else
4381   {
4382   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4383   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4384   }
4385 }
4386 
4387 #ifdef SUPPORT_UNICODE
4388 
4389 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4390 static void do_utfreadchar(compiler_common *common)
4391 {
4392 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4393 of the character (>= 0xc0). Return char value in TMP1. */
4394 DEFINE_COMPILER;
4395 struct sljit_jump *jump;
4396 
4397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4398 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4400 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4401 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4402 
4403 /* Searching for the first zero. */
4404 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4405 jump = JUMP(SLJIT_NOT_ZERO);
4406 /* Two byte sequence. */
4407 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4408 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4409 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4410 
4411 JUMPHERE(jump);
4412 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4413 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4414 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4415 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4416 
4417 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4418 jump = JUMP(SLJIT_NOT_ZERO);
4419 /* Three byte sequence. */
4420 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4422 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4423 
4424 /* Four byte sequence. */
4425 JUMPHERE(jump);
4426 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4427 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4431 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4432 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4433 }
4434 
do_utfreadtype8(compiler_common * common)4435 static void do_utfreadtype8(compiler_common *common)
4436 {
4437 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4438 of the character (>= 0xc0). Return value in TMP1. */
4439 DEFINE_COMPILER;
4440 struct sljit_jump *jump;
4441 struct sljit_jump *compare;
4442 
4443 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4444 
4445 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4446 jump = JUMP(SLJIT_NOT_ZERO);
4447 /* Two byte sequence. */
4448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4449 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4450 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4451 /* The upper 5 bits are known at this point. */
4452 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4453 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4454 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4455 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4456 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4457 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4458 
4459 JUMPHERE(compare);
4460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4461 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4462 
4463 /* We only have types for characters less than 256. */
4464 JUMPHERE(jump);
4465 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4467 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4468 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4469 }
4470 
do_utfreadchar_invalid(compiler_common * common)4471 static void do_utfreadchar_invalid(compiler_common *common)
4472 {
4473 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4474 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4475 undefined for invalid characters. */
4476 DEFINE_COMPILER;
4477 sljit_s32 i;
4478 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4479 struct sljit_jump *jump;
4480 struct sljit_jump *buffer_end_close;
4481 struct sljit_label *three_byte_entry;
4482 struct sljit_label *exit_invalid_label;
4483 struct sljit_jump *exit_invalid[11];
4484 
4485 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4486 
4487 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4488 
4489 /* Usually more than 3 characters remained in the subject buffer. */
4490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4491 
4492 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4493 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4494 
4495 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4496 
4497 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4498 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4499 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4500 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4501 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4502 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4503 
4504 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4505 jump = JUMP(SLJIT_NOT_ZERO);
4506 
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4508 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4509 
4510 JUMPHERE(jump);
4511 
4512 /* Three-byte sequence. */
4513 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4514 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4515 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4516 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4517 if (has_cmov)
4518   {
4519   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4520   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4521   exit_invalid[2] = NULL;
4522   }
4523 else
4524   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525 
4526 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4527 jump = JUMP(SLJIT_NOT_ZERO);
4528 
4529 three_byte_entry = LABEL();
4530 
4531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4532 if (has_cmov)
4533   {
4534   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4535   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4536   exit_invalid[3] = NULL;
4537   }
4538 else
4539   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4541 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4542 
4543 if (has_cmov)
4544   {
4545   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4546   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4547   exit_invalid[4] = NULL;
4548   }
4549 else
4550   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4552 
4553 JUMPHERE(jump);
4554 
4555 /* Four-byte sequence. */
4556 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4557 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4558 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4560 if (has_cmov)
4561   {
4562   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4563   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4564   exit_invalid[5] = NULL;
4565   }
4566 else
4567   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568 
4569 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4570 if (has_cmov)
4571   {
4572   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4573   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4574   exit_invalid[6] = NULL;
4575   }
4576 else
4577   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578 
4579 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4580 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4581 
4582 JUMPHERE(buffer_end_close);
4583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4584 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4585 
4586 /* Two-byte sequence. */
4587 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4588 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4589 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4590 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4591 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4592 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4593 
4594 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4595 jump = JUMP(SLJIT_NOT_ZERO);
4596 
4597 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4598 
4599 /* Three-byte sequence. */
4600 JUMPHERE(jump);
4601 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4602 
4603 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4604 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4605 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4606 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4607 if (has_cmov)
4608   {
4609   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4610   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4611   exit_invalid[10] = NULL;
4612   }
4613 else
4614   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615 
4616 /* One will be substracted from STR_PTR later. */
4617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4618 
4619 /* Four byte sequences are not possible. */
4620 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4621 
4622 exit_invalid_label = LABEL();
4623 for (i = 0; i < 11; i++)
4624   sljit_set_label(exit_invalid[i], exit_invalid_label);
4625 
4626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628 }
4629 
do_utfreadnewline_invalid(compiler_common * common)4630 static void do_utfreadnewline_invalid(compiler_common *common)
4631 {
4632 /* Slow decoding a UTF-8 character, specialized for newlines.
4633 TMP1 contains the first byte of the character (>= 0xc0). Return
4634 char value in TMP1. */
4635 DEFINE_COMPILER;
4636 struct sljit_label *loop;
4637 struct sljit_label *skip_start;
4638 struct sljit_label *three_byte_exit;
4639 struct sljit_jump *jump[5];
4640 
4641 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4642 
4643 if (common->nltype != NLTYPE_ANY)
4644   {
4645   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4646 
4647   /* All newlines are ascii, just skip intermediate octets. */
4648   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4649   loop = LABEL();
4650   if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4651     sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4652   else
4653     {
4654     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4655     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4656     }
4657 
4658   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4659   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4660   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661 
4662   JUMPHERE(jump[0]);
4663 
4664   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4665   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666   return;
4667   }
4668 
4669 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4672 
4673 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4674 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4675 
4676 skip_start = LABEL();
4677 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4678 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4679 
4680 /* Skip intermediate octets. */
4681 loop = LABEL();
4682 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4683 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4685 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4686 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4687 
4688 JUMPHERE(jump[3]);
4689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690 
4691 three_byte_exit = LABEL();
4692 JUMPHERE(jump[0]);
4693 JUMPHERE(jump[4]);
4694 
4695 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4696 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4697 
4698 /* Two byte long newline: 0x85. */
4699 JUMPHERE(jump[1]);
4700 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4701 
4702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4703 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4704 
4705 /* Three byte long newlines: 0x2028 and 0x2029. */
4706 JUMPHERE(jump[2]);
4707 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4708 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4709 
4710 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4712 
4713 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4714 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4715 
4716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4717 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4718 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4719 }
4720 
do_utfmoveback_invalid(compiler_common * common)4721 static void do_utfmoveback_invalid(compiler_common *common)
4722 {
4723 /* Goes one character back. */
4724 DEFINE_COMPILER;
4725 sljit_s32 i;
4726 struct sljit_jump *jump;
4727 struct sljit_jump *buffer_start_close;
4728 struct sljit_label *exit_ok_label;
4729 struct sljit_label *exit_invalid_label;
4730 struct sljit_jump *exit_invalid[7];
4731 
4732 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4733 
4734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4735 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4736 
4737 /* Two-byte sequence. */
4738 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4739 
4740 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4741 
4742 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4743 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4744 
4745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4747 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4748 
4749 /* Three-byte sequence. */
4750 JUMPHERE(jump);
4751 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4752 
4753 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4754 
4755 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4756 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4757 
4758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4760 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4761 
4762 /* Four-byte sequence. */
4763 JUMPHERE(jump);
4764 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4765 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4766 
4767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4768 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4769 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4770 
4771 exit_ok_label = LABEL();
4772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4773 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4774 
4775 /* Two-byte sequence. */
4776 JUMPHERE(buffer_start_close);
4777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4778 
4779 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4780 
4781 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4782 
4783 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4784 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4785 
4786 /* Three-byte sequence. */
4787 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4788 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4789 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4790 
4791 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4792 
4793 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4794 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4795 
4796 /* Four-byte sequences are not possible. */
4797 
4798 exit_invalid_label = LABEL();
4799 sljit_set_label(exit_invalid[5], exit_invalid_label);
4800 sljit_set_label(exit_invalid[6], exit_invalid_label);
4801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4803 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4804 
4805 JUMPHERE(exit_invalid[4]);
4806 /* -2 + 4 = 2 */
4807 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4808 
4809 exit_invalid_label = LABEL();
4810 for (i = 0; i < 4; i++)
4811   sljit_set_label(exit_invalid[i], exit_invalid_label);
4812 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4813 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4814 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4815 }
4816 
do_utfpeakcharback(compiler_common * common)4817 static void do_utfpeakcharback(compiler_common *common)
4818 {
4819 /* Peak a character back. Does not modify STR_PTR. */
4820 DEFINE_COMPILER;
4821 struct sljit_jump *jump[2];
4822 
4823 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4824 
4825 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4826 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4827 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4828 
4829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4831 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4832 
4833 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4835 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4836 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4837 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4838 
4839 JUMPHERE(jump[1]);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4841 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4842 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4843 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4844 
4845 JUMPHERE(jump[0]);
4846 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4847 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4848 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4850 
4851 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4852 }
4853 
do_utfpeakcharback_invalid(compiler_common * common)4854 static void do_utfpeakcharback_invalid(compiler_common *common)
4855 {
4856 /* Peak a character back. Does not modify STR_PTR. */
4857 DEFINE_COMPILER;
4858 sljit_s32 i;
4859 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4860 struct sljit_jump *jump[2];
4861 struct sljit_label *two_byte_entry;
4862 struct sljit_label *three_byte_entry;
4863 struct sljit_label *exit_invalid_label;
4864 struct sljit_jump *exit_invalid[8];
4865 
4866 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4867 
4868 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4869 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4870 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4871 
4872 /* Two-byte sequence. */
4873 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4874 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4875 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4876 
4877 two_byte_entry = LABEL();
4878 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4879 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4880 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4881 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4882 
4883 JUMPHERE(jump[1]);
4884 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4885 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4886 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4887 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4888 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4889 
4890 /* Three-byte sequence. */
4891 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4892 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4893 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4894 
4895 three_byte_entry = LABEL();
4896 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4897 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4898 
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4900 if (has_cmov)
4901   {
4902   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4903   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4904   exit_invalid[2] = NULL;
4905   }
4906 else
4907   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908 
4909 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4910 if (has_cmov)
4911   {
4912   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4913   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4914   exit_invalid[3] = NULL;
4915   }
4916 else
4917   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918 
4919 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4920 
4921 JUMPHERE(jump[1]);
4922 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4923 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4924 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4926 
4927 /* Four-byte sequence. */
4928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4929 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4930 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4931 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4932 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4933 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4934 
4935 if (has_cmov)
4936   {
4937   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4938   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4939   exit_invalid[5] = NULL;
4940   }
4941 else
4942   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943 
4944 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4945 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4946 
4947 JUMPHERE(jump[0]);
4948 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4949 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4950 
4951 /* Two-byte sequence. */
4952 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4953 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4954 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4955 
4956 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4957 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4958 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4959 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4960 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4961 
4962 /* Three-byte sequence. */
4963 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4964 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4965 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4966 
4967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4968 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4969 
4970 JUMPHERE(jump[0]);
4971 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4972 
4973 /* Two-byte sequence. */
4974 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4975 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4976 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4977 
4978 exit_invalid_label = LABEL();
4979 for (i = 0; i < 8; i++)
4980   sljit_set_label(exit_invalid[i], exit_invalid_label);
4981 
4982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4983 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4984 }
4985 
4986 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4987 
4988 #if PCRE2_CODE_UNIT_WIDTH == 16
4989 
do_utfreadchar_invalid(compiler_common * common)4990 static void do_utfreadchar_invalid(compiler_common *common)
4991 {
4992 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4993 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4994 undefined for invalid characters. */
4995 DEFINE_COMPILER;
4996 struct sljit_jump *exit_invalid[3];
4997 
4998 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4999 
5000 /* TMP2 contains the high surrogate. */
5001 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5002 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5003 
5004 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5005 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5007 
5008 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5009 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5010 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5011 
5012 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5013 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5014 
5015 JUMPHERE(exit_invalid[0]);
5016 JUMPHERE(exit_invalid[1]);
5017 JUMPHERE(exit_invalid[2]);
5018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5019 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5020 }
5021 
do_utfreadnewline_invalid(compiler_common * common)5022 static void do_utfreadnewline_invalid(compiler_common *common)
5023 {
5024 /* Slow decoding a UTF-16 character, specialized for newlines.
5025 TMP1 contains the first half of the character (>= 0xd800). Return
5026 char value in TMP1. */
5027 
5028 DEFINE_COMPILER;
5029 struct sljit_jump *exit_invalid[2];
5030 
5031 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5032 
5033 /* TMP2 contains the high surrogate. */
5034 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5035 
5036 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5037 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5038 
5039 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5040 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
5041 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5043 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5045 
5046 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5047 
5048 JUMPHERE(exit_invalid[0]);
5049 JUMPHERE(exit_invalid[1]);
5050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5051 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052 }
5053 
do_utfmoveback_invalid(compiler_common * common)5054 static void do_utfmoveback_invalid(compiler_common *common)
5055 {
5056 /* Goes one character back. */
5057 DEFINE_COMPILER;
5058 struct sljit_jump *exit_invalid[3];
5059 
5060 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5061 
5062 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5063 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5064 
5065 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5066 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5068 
5069 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5071 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5072 
5073 JUMPHERE(exit_invalid[0]);
5074 JUMPHERE(exit_invalid[1]);
5075 JUMPHERE(exit_invalid[2]);
5076 
5077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5079 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5080 }
5081 
do_utfpeakcharback_invalid(compiler_common * common)5082 static void do_utfpeakcharback_invalid(compiler_common *common)
5083 {
5084 /* Peak a character back. Does not modify STR_PTR. */
5085 DEFINE_COMPILER;
5086 struct sljit_jump *jump;
5087 struct sljit_jump *exit_invalid[3];
5088 
5089 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5090 
5091 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5092 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5093 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5094 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5095 
5096 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5098 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5099 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5100 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5102 
5103 JUMPHERE(jump);
5104 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5105 
5106 JUMPHERE(exit_invalid[0]);
5107 JUMPHERE(exit_invalid[1]);
5108 JUMPHERE(exit_invalid[2]);
5109 
5110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5111 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5112 }
5113 
5114 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5115 
5116 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5117 #define UCD_BLOCK_MASK 127
5118 #define UCD_BLOCK_SHIFT 7
5119 
do_getucd(compiler_common * common)5120 static void do_getucd(compiler_common *common)
5121 {
5122 /* Search the UCD record for the character comes in TMP1.
5123 Returns chartype in TMP1 and UCD offset in TMP2. */
5124 DEFINE_COMPILER;
5125 #if PCRE2_CODE_UNIT_WIDTH == 32
5126 struct sljit_jump *jump;
5127 #endif
5128 
5129 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5130 /* dummy_ucd_record */
5131 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5132 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5133 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5134 #endif
5135 
5136 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5137 
5138 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5139 
5140 #if PCRE2_CODE_UNIT_WIDTH == 32
5141 if (!common->utf)
5142   {
5143   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5144   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5145   JUMPHERE(jump);
5146   }
5147 #endif
5148 
5149 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5150 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5151 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5152 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5153 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5154 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5156 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5157 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5158 }
5159 
do_getucdtype(compiler_common * common)5160 static void do_getucdtype(compiler_common *common)
5161 {
5162 /* Search the UCD record for the character comes in TMP1.
5163 Returns chartype in TMP1 and UCD offset in TMP2. */
5164 DEFINE_COMPILER;
5165 #if PCRE2_CODE_UNIT_WIDTH == 32
5166 struct sljit_jump *jump;
5167 #endif
5168 
5169 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5170 /* dummy_ucd_record */
5171 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5172 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5173 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5174 #endif
5175 
5176 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5177 
5178 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5179 
5180 #if PCRE2_CODE_UNIT_WIDTH == 32
5181 if (!common->utf)
5182   {
5183   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5184   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5185   JUMPHERE(jump);
5186   }
5187 #endif
5188 
5189 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5190 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5191 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5192 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5193 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5194 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5196 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5197 
5198 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5201 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5202 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5203 
5204 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5205 }
5206 
5207 #endif /* SUPPORT_UNICODE */
5208 
mainloop_entry(compiler_common * common)5209 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5210 {
5211 DEFINE_COMPILER;
5212 struct sljit_label *mainloop;
5213 struct sljit_label *newlinelabel = NULL;
5214 struct sljit_jump *start;
5215 struct sljit_jump *end = NULL;
5216 struct sljit_jump *end2 = NULL;
5217 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5218 struct sljit_label *loop;
5219 struct sljit_jump *jump;
5220 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5221 jump_list *newline = NULL;
5222 sljit_u32 overall_options = common->re->overall_options;
5223 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5224 BOOL newlinecheck = FALSE;
5225 BOOL readuchar = FALSE;
5226 
5227 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5228     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5229   newlinecheck = TRUE;
5230 
5231 SLJIT_ASSERT(common->abort_label == NULL);
5232 
5233 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5234   {
5235   /* Search for the end of the first line. */
5236   SLJIT_ASSERT(common->match_end_ptr != 0);
5237   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5238 
5239   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5240     {
5241     mainloop = LABEL();
5242     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5243     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5244     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5245     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5246     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5247     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5248     JUMPHERE(end);
5249     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5250     }
5251   else
5252     {
5253     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5254     mainloop = LABEL();
5255     /* Continual stores does not cause data dependency. */
5256     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5257     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5258     check_newlinechar(common, common->nltype, &newline, TRUE);
5259     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5260     JUMPHERE(end);
5261     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262     set_jumps(newline, LABEL());
5263     }
5264 
5265   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5266   }
5267 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5268   {
5269   /* Check whether offset limit is set and valid. */
5270   SLJIT_ASSERT(common->match_end_ptr != 0);
5271 
5272   if (HAS_VIRTUAL_REGISTERS)
5273     {
5274     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5275     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5276     }
5277   else
5278     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5279 
5280   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5281   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5282   if (HAS_VIRTUAL_REGISTERS)
5283     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5284   else
5285     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5286 
5287 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5288   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5289 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5290   if (HAS_VIRTUAL_REGISTERS)
5291     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5292 
5293   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5294   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5295   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5296   JUMPHERE(end2);
5297   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5298   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5299   JUMPHERE(end);
5300   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5301   }
5302 
5303 start = JUMP(SLJIT_JUMP);
5304 
5305 if (newlinecheck)
5306   {
5307   newlinelabel = LABEL();
5308   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5309   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5310   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5311   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5312   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5313 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5314   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5316   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5317   end2 = JUMP(SLJIT_JUMP);
5318   }
5319 
5320 mainloop = LABEL();
5321 
5322 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5323 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5324 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5325 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5326 if (newlinecheck) readuchar = TRUE;
5327 
5328 if (readuchar)
5329   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5330 
5331 if (newlinecheck)
5332   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5333 
5334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5335 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5336 #if PCRE2_CODE_UNIT_WIDTH == 8
5337 if (common->invalid_utf)
5338   {
5339   /* Skip continuation code units. */
5340   loop = LABEL();
5341   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5342   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5343   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5344   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5345   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5346   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5347   JUMPHERE(jump);
5348   }
5349 else if (common->utf)
5350   {
5351   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5352   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5353   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5354   JUMPHERE(jump);
5355   }
5356 #elif PCRE2_CODE_UNIT_WIDTH == 16
5357 if (common->invalid_utf)
5358   {
5359   /* Skip continuation code units. */
5360   loop = LABEL();
5361   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5362   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5363   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5364   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5365   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5366   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5367   JUMPHERE(jump);
5368   }
5369 else if (common->utf)
5370   {
5371   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5372 
5373   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5374     {
5375     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5377     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5378     }
5379   else
5380     {
5381     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5382     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5383     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5384     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5385     }
5386   }
5387 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5388 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5389 JUMPHERE(start);
5390 
5391 if (newlinecheck)
5392   {
5393   JUMPHERE(end);
5394   JUMPHERE(end2);
5395   }
5396 
5397 return mainloop;
5398 }
5399 
5400 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5401 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5402 {
5403 sljit_u32 i, count = chars->count;
5404 
5405 if (count == 255)
5406   return;
5407 
5408 if (count == 0)
5409   {
5410   chars->count = 1;
5411   chars->chars[0] = chr;
5412 
5413   if (last)
5414     chars->last_count = 1;
5415   return;
5416   }
5417 
5418 for (i = 0; i < count; i++)
5419   if (chars->chars[i] == chr)
5420     return;
5421 
5422 if (count >= MAX_DIFF_CHARS)
5423   {
5424   chars->count = 255;
5425   return;
5426   }
5427 
5428 chars->chars[count] = chr;
5429 chars->count = count + 1;
5430 
5431 if (last)
5432   chars->last_count++;
5433 }
5434 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5435 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5436 {
5437 /* Recursive function, which scans prefix literals. */
5438 BOOL last, any, class, caseless;
5439 int len, repeat, len_save, consumed = 0;
5440 sljit_u32 chr; /* Any unicode character. */
5441 sljit_u8 *bytes, *bytes_end, byte;
5442 PCRE2_SPTR alternative, cc_save, oc;
5443 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5444 PCRE2_UCHAR othercase[4];
5445 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5446 PCRE2_UCHAR othercase[2];
5447 #else
5448 PCRE2_UCHAR othercase[1];
5449 #endif
5450 
5451 repeat = 1;
5452 while (TRUE)
5453   {
5454   if (*rec_count == 0)
5455     return 0;
5456   (*rec_count)--;
5457 
5458   last = TRUE;
5459   any = FALSE;
5460   class = FALSE;
5461   caseless = FALSE;
5462 
5463   switch (*cc)
5464     {
5465     case OP_CHARI:
5466     caseless = TRUE;
5467     /* Fall through */
5468     case OP_CHAR:
5469     last = FALSE;
5470     cc++;
5471     break;
5472 
5473     case OP_SOD:
5474     case OP_SOM:
5475     case OP_SET_SOM:
5476     case OP_NOT_WORD_BOUNDARY:
5477     case OP_WORD_BOUNDARY:
5478     case OP_EODN:
5479     case OP_EOD:
5480     case OP_CIRC:
5481     case OP_CIRCM:
5482     case OP_DOLL:
5483     case OP_DOLLM:
5484     /* Zero width assertions. */
5485     cc++;
5486     continue;
5487 
5488     case OP_ASSERT:
5489     case OP_ASSERT_NOT:
5490     case OP_ASSERTBACK:
5491     case OP_ASSERTBACK_NOT:
5492     case OP_ASSERT_NA:
5493     case OP_ASSERTBACK_NA:
5494     cc = bracketend(cc);
5495     continue;
5496 
5497     case OP_PLUSI:
5498     case OP_MINPLUSI:
5499     case OP_POSPLUSI:
5500     caseless = TRUE;
5501     /* Fall through */
5502     case OP_PLUS:
5503     case OP_MINPLUS:
5504     case OP_POSPLUS:
5505     cc++;
5506     break;
5507 
5508     case OP_EXACTI:
5509     caseless = TRUE;
5510     /* Fall through */
5511     case OP_EXACT:
5512     repeat = GET2(cc, 1);
5513     last = FALSE;
5514     cc += 1 + IMM2_SIZE;
5515     break;
5516 
5517     case OP_QUERYI:
5518     case OP_MINQUERYI:
5519     case OP_POSQUERYI:
5520     caseless = TRUE;
5521     /* Fall through */
5522     case OP_QUERY:
5523     case OP_MINQUERY:
5524     case OP_POSQUERY:
5525     len = 1;
5526     cc++;
5527 #ifdef SUPPORT_UNICODE
5528     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5529 #endif
5530     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5531     if (max_chars == 0)
5532       return consumed;
5533     last = FALSE;
5534     break;
5535 
5536     case OP_KET:
5537     cc += 1 + LINK_SIZE;
5538     continue;
5539 
5540     case OP_ALT:
5541     cc += GET(cc, 1);
5542     continue;
5543 
5544     case OP_ONCE:
5545     case OP_BRA:
5546     case OP_BRAPOS:
5547     case OP_CBRA:
5548     case OP_CBRAPOS:
5549     alternative = cc + GET(cc, 1);
5550     while (*alternative == OP_ALT)
5551       {
5552       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5553       if (max_chars == 0)
5554         return consumed;
5555       alternative += GET(alternative, 1);
5556       }
5557 
5558     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5559       cc += IMM2_SIZE;
5560     cc += 1 + LINK_SIZE;
5561     continue;
5562 
5563     case OP_CLASS:
5564 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5565     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5566       return consumed;
5567 #endif
5568     class = TRUE;
5569     break;
5570 
5571     case OP_NCLASS:
5572 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5573     if (common->utf) return consumed;
5574 #endif
5575     class = TRUE;
5576     break;
5577 
5578 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5579     case OP_XCLASS:
5580 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581     if (common->utf) return consumed;
5582 #endif
5583     any = TRUE;
5584     cc += GET(cc, 1);
5585     break;
5586 #endif
5587 
5588     case OP_DIGIT:
5589 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5590     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5591       return consumed;
5592 #endif
5593     any = TRUE;
5594     cc++;
5595     break;
5596 
5597     case OP_WHITESPACE:
5598 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5599     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5600       return consumed;
5601 #endif
5602     any = TRUE;
5603     cc++;
5604     break;
5605 
5606     case OP_WORDCHAR:
5607 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5608     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5609       return consumed;
5610 #endif
5611     any = TRUE;
5612     cc++;
5613     break;
5614 
5615     case OP_NOT:
5616     case OP_NOTI:
5617     cc++;
5618     /* Fall through. */
5619     case OP_NOT_DIGIT:
5620     case OP_NOT_WHITESPACE:
5621     case OP_NOT_WORDCHAR:
5622     case OP_ANY:
5623     case OP_ALLANY:
5624 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5625     if (common->utf) return consumed;
5626 #endif
5627     any = TRUE;
5628     cc++;
5629     break;
5630 
5631 #ifdef SUPPORT_UNICODE
5632     case OP_NOTPROP:
5633     case OP_PROP:
5634 #if PCRE2_CODE_UNIT_WIDTH != 32
5635     if (common->utf) return consumed;
5636 #endif
5637     any = TRUE;
5638     cc += 1 + 2;
5639     break;
5640 #endif
5641 
5642     case OP_TYPEEXACT:
5643     repeat = GET2(cc, 1);
5644     cc += 1 + IMM2_SIZE;
5645     continue;
5646 
5647     case OP_NOTEXACT:
5648     case OP_NOTEXACTI:
5649 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5650     if (common->utf) return consumed;
5651 #endif
5652     any = TRUE;
5653     repeat = GET2(cc, 1);
5654     cc += 1 + IMM2_SIZE + 1;
5655     break;
5656 
5657     default:
5658     return consumed;
5659     }
5660 
5661   if (any)
5662     {
5663     do
5664       {
5665       chars->count = 255;
5666 
5667       consumed++;
5668       if (--max_chars == 0)
5669         return consumed;
5670       chars++;
5671       }
5672     while (--repeat > 0);
5673 
5674     repeat = 1;
5675     continue;
5676     }
5677 
5678   if (class)
5679     {
5680     bytes = (sljit_u8*) (cc + 1);
5681     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5682 
5683     switch (*cc)
5684       {
5685       case OP_CRSTAR:
5686       case OP_CRMINSTAR:
5687       case OP_CRPOSSTAR:
5688       case OP_CRQUERY:
5689       case OP_CRMINQUERY:
5690       case OP_CRPOSQUERY:
5691       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5692       if (max_chars == 0)
5693         return consumed;
5694       break;
5695 
5696       default:
5697       case OP_CRPLUS:
5698       case OP_CRMINPLUS:
5699       case OP_CRPOSPLUS:
5700       break;
5701 
5702       case OP_CRRANGE:
5703       case OP_CRMINRANGE:
5704       case OP_CRPOSRANGE:
5705       repeat = GET2(cc, 1);
5706       if (repeat <= 0)
5707         return consumed;
5708       break;
5709       }
5710 
5711     do
5712       {
5713       if (bytes[31] & 0x80)
5714         chars->count = 255;
5715       else if (chars->count != 255)
5716         {
5717         bytes_end = bytes + 32;
5718         chr = 0;
5719         do
5720           {
5721           byte = *bytes++;
5722           SLJIT_ASSERT((chr & 0x7) == 0);
5723           if (byte == 0)
5724             chr += 8;
5725           else
5726             {
5727             do
5728               {
5729               if ((byte & 0x1) != 0)
5730                 add_prefix_char(chr, chars, TRUE);
5731               byte >>= 1;
5732               chr++;
5733               }
5734             while (byte != 0);
5735             chr = (chr + 7) & ~7;
5736             }
5737           }
5738         while (chars->count != 255 && bytes < bytes_end);
5739         bytes = bytes_end - 32;
5740         }
5741 
5742       consumed++;
5743       if (--max_chars == 0)
5744         return consumed;
5745       chars++;
5746       }
5747     while (--repeat > 0);
5748 
5749     switch (*cc)
5750       {
5751       case OP_CRSTAR:
5752       case OP_CRMINSTAR:
5753       case OP_CRPOSSTAR:
5754       return consumed;
5755 
5756       case OP_CRQUERY:
5757       case OP_CRMINQUERY:
5758       case OP_CRPOSQUERY:
5759       cc++;
5760       break;
5761 
5762       case OP_CRRANGE:
5763       case OP_CRMINRANGE:
5764       case OP_CRPOSRANGE:
5765       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5766         return consumed;
5767       cc += 1 + 2 * IMM2_SIZE;
5768       break;
5769       }
5770 
5771     repeat = 1;
5772     continue;
5773     }
5774 
5775   len = 1;
5776 #ifdef SUPPORT_UNICODE
5777   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5778 #endif
5779 
5780   if (caseless && char_has_othercase(common, cc))
5781     {
5782 #ifdef SUPPORT_UNICODE
5783     if (common->utf)
5784       {
5785       GETCHAR(chr, cc);
5786       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5787         return consumed;
5788       }
5789     else
5790 #endif
5791       {
5792       chr = *cc;
5793 #ifdef SUPPORT_UNICODE
5794       if (common->ucp && chr > 127)
5795         othercase[0] = UCD_OTHERCASE(chr);
5796       else
5797 #endif
5798         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5799       }
5800     }
5801   else
5802     {
5803     caseless = FALSE;
5804     othercase[0] = 0; /* Stops compiler warning - PH */
5805     }
5806 
5807   len_save = len;
5808   cc_save = cc;
5809   while (TRUE)
5810     {
5811     oc = othercase;
5812     do
5813       {
5814       len--;
5815       consumed++;
5816 
5817       chr = *cc;
5818       add_prefix_char(*cc, chars, len == 0);
5819 
5820       if (caseless)
5821         add_prefix_char(*oc, chars, len == 0);
5822 
5823       if (--max_chars == 0)
5824         return consumed;
5825       chars++;
5826       cc++;
5827       oc++;
5828       }
5829     while (len > 0);
5830 
5831     if (--repeat == 0)
5832       break;
5833 
5834     len = len_save;
5835     cc = cc_save;
5836     }
5837 
5838   repeat = 1;
5839   if (last)
5840     return consumed;
5841   }
5842 }
5843 
5844 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5845 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5846 {
5847 #if PCRE2_CODE_UNIT_WIDTH == 8
5848 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5849 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5850 #elif PCRE2_CODE_UNIT_WIDTH == 16
5851 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5852 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5853 #else
5854 #error "Unknown code width"
5855 #endif
5856 }
5857 #endif
5858 
5859 #include "pcre2_jit_simd_inc.h"
5860 
5861 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5862 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5863 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5864 {
5865   sljit_s32 i, j, max_i = 0, max_j = 0;
5866   sljit_u32 max_pri = 0;
5867   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5868 
5869   for (i = max - 1; i >= 1; i--)
5870     {
5871     if (chars[i].last_count > 2)
5872       {
5873       a1 = chars[i].chars[0];
5874       a2 = chars[i].chars[1];
5875       a_pri = chars[i].last_count;
5876 
5877       j = i - max_fast_forward_char_pair_offset();
5878       if (j < 0)
5879         j = 0;
5880 
5881       while (j < i)
5882         {
5883         b_pri = chars[j].last_count;
5884         if (b_pri > 2 && a_pri + b_pri >= max_pri)
5885           {
5886           b1 = chars[j].chars[0];
5887           b2 = chars[j].chars[1];
5888 
5889           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5890             {
5891             max_pri = a_pri + b_pri;
5892             max_i = i;
5893             max_j = j;
5894             }
5895           }
5896         j++;
5897         }
5898       }
5899     }
5900 
5901 if (max_pri == 0)
5902   return FALSE;
5903 
5904 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5905 return TRUE;
5906 }
5907 
5908 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5909 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5910 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5911 {
5912 DEFINE_COMPILER;
5913 struct sljit_label *start;
5914 struct sljit_jump *match;
5915 struct sljit_jump *partial_quit;
5916 PCRE2_UCHAR mask;
5917 BOOL has_match_end = (common->match_end_ptr != 0);
5918 
5919 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5920 
5921 if (has_match_end)
5922   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5923 
5924 if (offset > 0)
5925   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5926 
5927 if (has_match_end)
5928   {
5929   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5930 
5931   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5932   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5933   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5934   }
5935 
5936 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5937 
5938 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5939   {
5940   fast_forward_char_simd(common, char1, char2, offset);
5941 
5942   if (offset > 0)
5943     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5944 
5945   if (has_match_end)
5946     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5947   return;
5948   }
5949 
5950 #endif
5951 
5952 start = LABEL();
5953 
5954 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5955 if (common->mode == PCRE2_JIT_COMPLETE)
5956   add_jump(compiler, &common->failed_match, partial_quit);
5957 
5958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5960 
5961 if (char1 == char2)
5962   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5963 else
5964   {
5965   mask = char1 ^ char2;
5966   if (is_powerof2(mask))
5967     {
5968     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5969     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5970     }
5971   else
5972     {
5973     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5974     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5975     JUMPHERE(match);
5976     }
5977   }
5978 
5979 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5980 if (common->utf && offset > 0)
5981   {
5982   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5983   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5984   }
5985 #endif
5986 
5987 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5988 
5989 if (common->mode != PCRE2_JIT_COMPLETE)
5990   JUMPHERE(partial_quit);
5991 
5992 if (has_match_end)
5993   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5994 }
5995 
fast_forward_first_n_chars(compiler_common * common)5996 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5997 {
5998 DEFINE_COMPILER;
5999 struct sljit_label *start;
6000 struct sljit_jump *match;
6001 fast_forward_char_data chars[MAX_N_CHARS];
6002 sljit_s32 offset;
6003 PCRE2_UCHAR mask;
6004 PCRE2_UCHAR *char_set, *char_set_end;
6005 int i, max, from;
6006 int range_right = -1, range_len;
6007 sljit_u8 *update_table = NULL;
6008 BOOL in_range;
6009 sljit_u32 rec_count;
6010 
6011 for (i = 0; i < MAX_N_CHARS; i++)
6012   {
6013   chars[i].count = 0;
6014   chars[i].last_count = 0;
6015   }
6016 
6017 rec_count = 10000;
6018 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6019 
6020 if (max < 1)
6021   return FALSE;
6022 
6023 /* Convert last_count to priority. */
6024 for (i = 0; i < max; i++)
6025   {
6026   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6027 
6028   if (chars[i].count == 1)
6029     {
6030     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6031     /* Simplifies algorithms later. */
6032     chars[i].chars[1] = chars[i].chars[0];
6033     }
6034   else if (chars[i].count == 2)
6035     {
6036     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6037 
6038     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6039       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6040     else
6041       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6042     }
6043   else
6044     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6045   }
6046 
6047 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6048 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6049   return TRUE;
6050 #endif
6051 
6052 in_range = FALSE;
6053 /* Prevent compiler "uninitialized" warning */
6054 from = 0;
6055 range_len = 4 /* minimum length */ - 1;
6056 for (i = 0; i <= max; i++)
6057   {
6058   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6059     {
6060     range_len = i - from;
6061     range_right = i - 1;
6062     }
6063 
6064   if (i < max && chars[i].count < 255)
6065     {
6066     SLJIT_ASSERT(chars[i].count > 0);
6067     if (!in_range)
6068       {
6069       in_range = TRUE;
6070       from = i;
6071       }
6072     }
6073   else
6074     in_range = FALSE;
6075   }
6076 
6077 if (range_right >= 0)
6078   {
6079   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6080   if (update_table == NULL)
6081     return TRUE;
6082   memset(update_table, IN_UCHARS(range_len), 256);
6083 
6084   for (i = 0; i < range_len; i++)
6085     {
6086     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6087 
6088     char_set = chars[range_right - i].chars;
6089     char_set_end = char_set + chars[range_right - i].count;
6090     do
6091       {
6092       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6093         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6094       char_set++;
6095       }
6096     while (char_set < char_set_end);
6097     }
6098   }
6099 
6100 offset = -1;
6101 /* Scan forward. */
6102 for (i = 0; i < max; i++)
6103   {
6104   if (range_right == i)
6105     continue;
6106 
6107   if (offset == -1)
6108     {
6109     if (chars[i].last_count >= 2)
6110       offset = i;
6111     }
6112   else if (chars[offset].last_count < chars[i].last_count)
6113     offset = i;
6114   }
6115 
6116 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6117 
6118 if (range_right < 0)
6119   {
6120   if (offset < 0)
6121     return FALSE;
6122   /* Works regardless the value is 1 or 2. */
6123   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6124   return TRUE;
6125   }
6126 
6127 SLJIT_ASSERT(range_right != offset);
6128 
6129 if (common->match_end_ptr != 0)
6130   {
6131   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6132   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6133   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6134   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6135   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6136   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6137   }
6138 else
6139   {
6140   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6141   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6142   }
6143 
6144 SLJIT_ASSERT(range_right >= 0);
6145 
6146 if (!HAS_VIRTUAL_REGISTERS)
6147   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6148 
6149 start = LABEL();
6150 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6151 
6152 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6153 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6154 #else
6155 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6156 #endif
6157 
6158 if (!HAS_VIRTUAL_REGISTERS)
6159   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6160 else
6161   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6162 
6163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6164 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6165 
6166 if (offset >= 0)
6167   {
6168   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6169   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6170 
6171   if (chars[offset].count == 1)
6172     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6173   else
6174     {
6175     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6176     if (is_powerof2(mask))
6177       {
6178       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6179       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6180       }
6181     else
6182       {
6183       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6184       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6185       JUMPHERE(match);
6186       }
6187     }
6188   }
6189 
6190 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6191 if (common->utf && offset != 0)
6192   {
6193   if (offset < 0)
6194     {
6195     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6196     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6197     }
6198   else
6199     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6200 
6201   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6202 
6203   if (offset < 0)
6204     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6205   }
6206 #endif
6207 
6208 if (offset >= 0)
6209   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210 
6211 if (common->match_end_ptr != 0)
6212   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6213 else
6214   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6215 return TRUE;
6216 }
6217 
fast_forward_first_char(compiler_common * common)6218 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6219 {
6220 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6221 PCRE2_UCHAR oc;
6222 
6223 oc = first_char;
6224 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6225   {
6226   oc = TABLE_GET(first_char, common->fcc, first_char);
6227 #if defined SUPPORT_UNICODE
6228   if (first_char > 127 && (common->utf || common->ucp))
6229     oc = UCD_OTHERCASE(first_char);
6230 #endif
6231   }
6232 
6233 fast_forward_first_char2(common, first_char, oc, 0);
6234 }
6235 
fast_forward_newline(compiler_common * common)6236 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6237 {
6238 DEFINE_COMPILER;
6239 struct sljit_label *loop;
6240 struct sljit_jump *lastchar = NULL;
6241 struct sljit_jump *firstchar;
6242 struct sljit_jump *quit = NULL;
6243 struct sljit_jump *foundcr = NULL;
6244 struct sljit_jump *notfoundnl;
6245 jump_list *newline = NULL;
6246 
6247 if (common->match_end_ptr != 0)
6248   {
6249   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6250   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6251   }
6252 
6253 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6254   {
6255 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6256   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6257     {
6258     if (HAS_VIRTUAL_REGISTERS)
6259       {
6260       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6261       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6262       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6263       }
6264     else
6265       {
6266       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6267       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6268       }
6269     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6270 
6271     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6273     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6274 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6275     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6276 #endif
6277     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6278 
6279     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6280     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6281     }
6282   else
6283 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6284     {
6285     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6286     if (HAS_VIRTUAL_REGISTERS)
6287       {
6288       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6289       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6290       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6291       }
6292     else
6293       {
6294       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6295       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6296       }
6297     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6298 
6299     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6300     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6301     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6302 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6303     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6304 #endif
6305     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6306 
6307     loop = LABEL();
6308     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6309     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6310     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6311     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6312     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6313     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6314 
6315     JUMPHERE(quit);
6316     JUMPHERE(lastchar);
6317     }
6318 
6319   JUMPHERE(firstchar);
6320 
6321   if (common->match_end_ptr != 0)
6322     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6323   return;
6324   }
6325 
6326 if (HAS_VIRTUAL_REGISTERS)
6327   {
6328   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6329   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6330   }
6331 else
6332   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6333 
6334 /* Example: match /^/ to \r\n from offset 1. */
6335 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6336 
6337 if (common->nltype == NLTYPE_ANY)
6338   move_back(common, NULL, FALSE);
6339 else
6340   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6341 
6342 loop = LABEL();
6343 common->ff_newline_shortcut = loop;
6344 
6345 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6346 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6347   {
6348   if (common->nltype == NLTYPE_ANYCRLF)
6349     {
6350     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6351     if (common->mode != PCRE2_JIT_COMPLETE)
6352       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6353 
6354     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6355     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6356     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6357     }
6358    else
6359     {
6360     fast_forward_char_simd(common, common->newline, common->newline, 0);
6361 
6362     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6363     if (common->mode != PCRE2_JIT_COMPLETE)
6364       {
6365       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6366       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6367       }
6368     }
6369   }
6370 else
6371 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6372   {
6373   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6374   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6375   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6376     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6377   check_newlinechar(common, common->nltype, &newline, FALSE);
6378   set_jumps(newline, loop);
6379   }
6380 
6381 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6382   {
6383   if (quit == NULL)
6384     {
6385     quit = JUMP(SLJIT_JUMP);
6386     JUMPHERE(foundcr);
6387     }
6388 
6389   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6390   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6391   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6392   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6393 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6394   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6395 #endif
6396   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6397   JUMPHERE(notfoundnl);
6398   JUMPHERE(quit);
6399   }
6400 
6401 if (lastchar)
6402   JUMPHERE(lastchar);
6403 JUMPHERE(firstchar);
6404 
6405 if (common->match_end_ptr != 0)
6406   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6407 }
6408 
6409 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6410 
fast_forward_start_bits(compiler_common * common)6411 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6412 {
6413 DEFINE_COMPILER;
6414 const sljit_u8 *start_bits = common->re->start_bitmap;
6415 struct sljit_label *start;
6416 struct sljit_jump *partial_quit;
6417 #if PCRE2_CODE_UNIT_WIDTH != 8
6418 struct sljit_jump *found = NULL;
6419 #endif
6420 jump_list *matches = NULL;
6421 
6422 if (common->match_end_ptr != 0)
6423   {
6424   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6425   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6426   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6427   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6428   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6429   }
6430 
6431 start = LABEL();
6432 
6433 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6434 if (common->mode == PCRE2_JIT_COMPLETE)
6435   add_jump(compiler, &common->failed_match, partial_quit);
6436 
6437 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6438 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6439 
6440 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6441   {
6442 #if PCRE2_CODE_UNIT_WIDTH != 8
6443   if ((start_bits[31] & 0x80) != 0)
6444     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6445   else
6446     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6447 #elif defined SUPPORT_UNICODE
6448   if (common->utf && is_char7_bitset(start_bits, FALSE))
6449     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6450 #endif
6451   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6452   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6453   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6454   if (!HAS_VIRTUAL_REGISTERS)
6455     {
6456     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6457     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6458     }
6459   else
6460     {
6461     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6462     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6463     }
6464   JUMPTO(SLJIT_ZERO, start);
6465   }
6466 else
6467   set_jumps(matches, start);
6468 
6469 #if PCRE2_CODE_UNIT_WIDTH != 8
6470 if (found != NULL)
6471   JUMPHERE(found);
6472 #endif
6473 
6474 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6475 
6476 if (common->mode != PCRE2_JIT_COMPLETE)
6477   JUMPHERE(partial_quit);
6478 
6479 if (common->match_end_ptr != 0)
6480   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6481 }
6482 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6483 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6484 {
6485 DEFINE_COMPILER;
6486 struct sljit_label *loop;
6487 struct sljit_jump *toolong;
6488 struct sljit_jump *already_found;
6489 struct sljit_jump *found;
6490 struct sljit_jump *found_oc = NULL;
6491 jump_list *not_found = NULL;
6492 sljit_u32 oc, bit;
6493 
6494 SLJIT_ASSERT(common->req_char_ptr != 0);
6495 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6497 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6498 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6499 
6500 if (has_firstchar)
6501   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6502 else
6503   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6504 
6505 oc = req_char;
6506 if (caseless)
6507   {
6508   oc = TABLE_GET(req_char, common->fcc, req_char);
6509 #if defined SUPPORT_UNICODE
6510   if (req_char > 127 && (common->utf || common->ucp))
6511     oc = UCD_OTHERCASE(req_char);
6512 #endif
6513   }
6514 
6515 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6516 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6517   {
6518   not_found = fast_requested_char_simd(common, req_char, oc);
6519   }
6520 else
6521 #endif
6522   {
6523   loop = LABEL();
6524   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6525 
6526   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6527 
6528   if (req_char == oc)
6529     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6530   else
6531     {
6532     bit = req_char ^ oc;
6533     if (is_powerof2(bit))
6534       {
6535        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6536       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6537       }
6538     else
6539       {
6540       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6541       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6542       }
6543     }
6544   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6545   JUMPTO(SLJIT_JUMP, loop);
6546 
6547   JUMPHERE(found);
6548   if (found_oc)
6549     JUMPHERE(found_oc);
6550   }
6551 
6552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6553 
6554 JUMPHERE(already_found);
6555 JUMPHERE(toolong);
6556 return not_found;
6557 }
6558 
do_revertframes(compiler_common * common)6559 static void do_revertframes(compiler_common *common)
6560 {
6561 DEFINE_COMPILER;
6562 struct sljit_jump *jump;
6563 struct sljit_label *mainloop;
6564 
6565 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6566 GET_LOCAL_BASE(TMP1, 0, 0);
6567 
6568 /* Drop frames until we reach STACK_TOP. */
6569 mainloop = LABEL();
6570 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6571 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6572 
6573 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6574 if (HAS_VIRTUAL_REGISTERS)
6575   {
6576   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6577   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6578   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6579   }
6580 else
6581   {
6582   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6583   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6584   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6585   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6586   GET_LOCAL_BASE(TMP1, 0, 0);
6587   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6588   }
6589 JUMPTO(SLJIT_JUMP, mainloop);
6590 
6591 JUMPHERE(jump);
6592 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6593 /* End of reverting values. */
6594 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6595 
6596 JUMPHERE(jump);
6597 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6598 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6599 if (HAS_VIRTUAL_REGISTERS)
6600   {
6601   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6602   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6603   }
6604 else
6605   {
6606   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6607   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6608   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6609   }
6610 JUMPTO(SLJIT_JUMP, mainloop);
6611 }
6612 
check_wordboundary(compiler_common * common)6613 static void check_wordboundary(compiler_common *common)
6614 {
6615 DEFINE_COMPILER;
6616 struct sljit_jump *skipread;
6617 jump_list *skipread_list = NULL;
6618 #ifdef SUPPORT_UNICODE
6619 struct sljit_label *valid_utf;
6620 jump_list *invalid_utf1 = NULL;
6621 #endif /* SUPPORT_UNICODE */
6622 jump_list *invalid_utf2 = NULL;
6623 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6624 struct sljit_jump *jump;
6625 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6626 
6627 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6628 
6629 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6630 /* Get type of the previous char, and put it to TMP3. */
6631 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6632 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6633 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6634 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6635 
6636 #ifdef SUPPORT_UNICODE
6637 if (common->invalid_utf)
6638   {
6639   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6640 
6641   if (common->mode != PCRE2_JIT_COMPLETE)
6642     {
6643     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6644     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6645     move_back(common, NULL, TRUE);
6646     check_start_used_ptr(common);
6647     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6648     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6649     }
6650   }
6651 else
6652 #endif /* SUPPORT_UNICODE */
6653   {
6654   if (common->mode == PCRE2_JIT_COMPLETE)
6655     peek_char_back(common, READ_CHAR_MAX, NULL);
6656   else
6657     {
6658     move_back(common, NULL, TRUE);
6659     check_start_used_ptr(common);
6660     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6661     }
6662   }
6663 
6664 /* Testing char type. */
6665 #ifdef SUPPORT_UNICODE
6666 if (common->ucp)
6667   {
6668   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6669   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6670   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6671   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6672   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6673   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6674   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6675   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6676   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6677   JUMPHERE(jump);
6678   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6679   }
6680 else
6681 #endif /* SUPPORT_UNICODE */
6682   {
6683 #if PCRE2_CODE_UNIT_WIDTH != 8
6684   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6685 #elif defined SUPPORT_UNICODE
6686   /* Here TMP3 has already been zeroed. */
6687   jump = NULL;
6688   if (common->utf)
6689     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6691   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6692   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6693   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6694 #if PCRE2_CODE_UNIT_WIDTH != 8
6695   JUMPHERE(jump);
6696 #elif defined SUPPORT_UNICODE
6697   if (jump != NULL)
6698     JUMPHERE(jump);
6699 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6700   }
6701 JUMPHERE(skipread);
6702 
6703 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6704 check_str_end(common, &skipread_list);
6705 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6706 
6707 /* Testing char type. This is a code duplication. */
6708 #ifdef SUPPORT_UNICODE
6709 
6710 valid_utf = LABEL();
6711 
6712 if (common->ucp)
6713   {
6714   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6715   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6716   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6717   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6718   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6719   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6720   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6721   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6722   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6723   JUMPHERE(jump);
6724   }
6725 else
6726 #endif /* SUPPORT_UNICODE */
6727   {
6728 #if PCRE2_CODE_UNIT_WIDTH != 8
6729   /* TMP2 may be destroyed by peek_char. */
6730   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6731   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6732 #elif defined SUPPORT_UNICODE
6733   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6734   jump = NULL;
6735   if (common->utf)
6736     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737 #endif
6738   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6739   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6740   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6741 #if PCRE2_CODE_UNIT_WIDTH != 8
6742   JUMPHERE(jump);
6743 #elif defined SUPPORT_UNICODE
6744   if (jump != NULL)
6745     JUMPHERE(jump);
6746 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6747   }
6748 set_jumps(skipread_list, LABEL());
6749 
6750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6751 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6752 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6753 
6754 #ifdef SUPPORT_UNICODE
6755 if (common->invalid_utf)
6756   {
6757   set_jumps(invalid_utf1, LABEL());
6758 
6759   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6760   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6761 
6762   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6763   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6764   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6765 
6766   set_jumps(invalid_utf2, LABEL());
6767   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6769   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770   }
6771 #endif /* SUPPORT_UNICODE */
6772 }
6773 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6774 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6775 {
6776 /* May destroy TMP1. */
6777 DEFINE_COMPILER;
6778 int ranges[MAX_CLASS_RANGE_SIZE];
6779 sljit_u8 bit, cbit, all;
6780 int i, byte, length = 0;
6781 
6782 bit = bits[0] & 0x1;
6783 /* All bits will be zero or one (since bit is zero or one). */
6784 all = -bit;
6785 
6786 for (i = 0; i < 256; )
6787   {
6788   byte = i >> 3;
6789   if ((i & 0x7) == 0 && bits[byte] == all)
6790     i += 8;
6791   else
6792     {
6793     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6794     if (cbit != bit)
6795       {
6796       if (length >= MAX_CLASS_RANGE_SIZE)
6797         return FALSE;
6798       ranges[length] = i;
6799       length++;
6800       bit = cbit;
6801       all = -cbit;
6802       }
6803     i++;
6804     }
6805   }
6806 
6807 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6808   {
6809   if (length >= MAX_CLASS_RANGE_SIZE)
6810     return FALSE;
6811   ranges[length] = 256;
6812   length++;
6813   }
6814 
6815 if (length < 0 || length > 4)
6816   return FALSE;
6817 
6818 bit = bits[0] & 0x1;
6819 if (invert) bit ^= 0x1;
6820 
6821 /* No character is accepted. */
6822 if (length == 0 && bit == 0)
6823   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6824 
6825 switch(length)
6826   {
6827   case 0:
6828   /* When bit != 0, all characters are accepted. */
6829   return TRUE;
6830 
6831   case 1:
6832   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6833   return TRUE;
6834 
6835   case 2:
6836   if (ranges[0] + 1 != ranges[1])
6837     {
6838     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6839     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6840     }
6841   else
6842     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6843   return TRUE;
6844 
6845   case 3:
6846   if (bit != 0)
6847     {
6848     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6849     if (ranges[0] + 1 != ranges[1])
6850       {
6851       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6852       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6853       }
6854     else
6855       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6856     return TRUE;
6857     }
6858 
6859   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6860   if (ranges[1] + 1 != ranges[2])
6861     {
6862     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6863     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6864     }
6865   else
6866     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6867   return TRUE;
6868 
6869   case 4:
6870   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6871       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6872       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6873       && is_powerof2(ranges[2] - ranges[0]))
6874     {
6875     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6876     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6877     if (ranges[2] + 1 != ranges[3])
6878       {
6879       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6880       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6881       }
6882     else
6883       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6884     return TRUE;
6885     }
6886 
6887   if (bit != 0)
6888     {
6889     i = 0;
6890     if (ranges[0] + 1 != ranges[1])
6891       {
6892       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6893       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6894       i = ranges[0];
6895       }
6896     else
6897       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6898 
6899     if (ranges[2] + 1 != ranges[3])
6900       {
6901       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6902       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6903       }
6904     else
6905       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6906     return TRUE;
6907     }
6908 
6909   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6910   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6911   if (ranges[1] + 1 != ranges[2])
6912     {
6913     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6914     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6915     }
6916   else
6917     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6918   return TRUE;
6919 
6920   default:
6921   SLJIT_UNREACHABLE();
6922   return FALSE;
6923   }
6924 }
6925 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6926 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6927 {
6928 /* May destroy TMP1. */
6929 DEFINE_COMPILER;
6930 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6931 uint8_t byte;
6932 sljit_s32 type;
6933 int i, j, k, len, c;
6934 
6935 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6936   return FALSE;
6937 
6938 len = 0;
6939 
6940 for (i = 0; i < 32; i++)
6941   {
6942   byte = bits[i];
6943 
6944   if (nclass)
6945     byte = ~byte;
6946 
6947   j = 0;
6948   while (byte != 0)
6949     {
6950     if (byte & 0x1)
6951       {
6952       c = i * 8 + j;
6953 
6954       k = len;
6955 
6956       if ((c & 0x20) != 0)
6957         {
6958         for (k = 0; k < len; k++)
6959           if (char_list[k] == c - 0x20)
6960             {
6961             char_list[k] |= 0x120;
6962             break;
6963             }
6964         }
6965 
6966       if (k == len)
6967         {
6968         if (len >= MAX_CLASS_CHARS_SIZE)
6969           return FALSE;
6970 
6971         char_list[len++] = (uint16_t) c;
6972         }
6973       }
6974 
6975     byte >>= 1;
6976     j++;
6977     }
6978   }
6979 
6980 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6981 
6982 i = 0;
6983 j = 0;
6984 
6985 if (char_list[0] == 0)
6986   {
6987   i++;
6988   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6989   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6990   }
6991 else
6992   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6993 
6994 while (i < len)
6995   {
6996   if ((char_list[i] & 0x100) != 0)
6997     j++;
6998   else
6999     {
7000     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
7001     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7002     }
7003   i++;
7004   }
7005 
7006 if (j != 0)
7007   {
7008   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7009 
7010   for (i = 0; i < len; i++)
7011     if ((char_list[i] & 0x100) != 0)
7012       {
7013       j--;
7014       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7015       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7016       }
7017   }
7018 
7019 if (invert)
7020   nclass = !nclass;
7021 
7022 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7023 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7024 return TRUE;
7025 }
7026 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7027 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7028 {
7029 /* May destroy TMP1. */
7030 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7031   return TRUE;
7032 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7033 }
7034 
check_anynewline(compiler_common * common)7035 static void check_anynewline(compiler_common *common)
7036 {
7037 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7038 DEFINE_COMPILER;
7039 
7040 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7041 
7042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7043 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7044 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7045 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7046 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7047 #if PCRE2_CODE_UNIT_WIDTH == 8
7048 if (common->utf)
7049   {
7050 #endif
7051   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7052   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7053   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7054 #if PCRE2_CODE_UNIT_WIDTH == 8
7055   }
7056 #endif
7057 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7058 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7059 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7060 }
7061 
check_hspace(compiler_common * common)7062 static void check_hspace(compiler_common *common)
7063 {
7064 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7065 DEFINE_COMPILER;
7066 
7067 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7068 
7069 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7070 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7071 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7072 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7073 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7074 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7075 #if PCRE2_CODE_UNIT_WIDTH == 8
7076 if (common->utf)
7077   {
7078 #endif
7079   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7080   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7081   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7082   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7083   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7084   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7085   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7086   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7087   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7088   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7090   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7091   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7092 #if PCRE2_CODE_UNIT_WIDTH == 8
7093   }
7094 #endif
7095 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7096 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7097 
7098 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7099 }
7100 
check_vspace(compiler_common * common)7101 static void check_vspace(compiler_common *common)
7102 {
7103 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7104 DEFINE_COMPILER;
7105 
7106 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7107 
7108 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7109 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7111 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7112 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7113 #if PCRE2_CODE_UNIT_WIDTH == 8
7114 if (common->utf)
7115   {
7116 #endif
7117   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7118   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7119   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7120 #if PCRE2_CODE_UNIT_WIDTH == 8
7121   }
7122 #endif
7123 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7124 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7125 
7126 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7127 }
7128 
do_casefulcmp(compiler_common * common)7129 static void do_casefulcmp(compiler_common *common)
7130 {
7131 DEFINE_COMPILER;
7132 struct sljit_jump *jump;
7133 struct sljit_label *label;
7134 int char1_reg;
7135 int char2_reg;
7136 
7137 if (HAS_VIRTUAL_REGISTERS)
7138   {
7139   char1_reg = STR_END;
7140   char2_reg = STACK_TOP;
7141   }
7142 else
7143   {
7144   char1_reg = TMP3;
7145   char2_reg = RETURN_ADDR;
7146   }
7147 
7148 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7149 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7150 
7151 if (char1_reg == STR_END)
7152   {
7153   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7154   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7155   }
7156 
7157 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7158   {
7159   label = LABEL();
7160   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7161   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7162   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7163   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7164   JUMPTO(SLJIT_NOT_ZERO, label);
7165 
7166   JUMPHERE(jump);
7167   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7168   }
7169 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7170   {
7171   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7172   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7173 
7174   label = LABEL();
7175   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7176   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7177   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7178   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7179   JUMPTO(SLJIT_NOT_ZERO, label);
7180 
7181   JUMPHERE(jump);
7182   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7183   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7184   }
7185 else
7186   {
7187   label = LABEL();
7188   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7189   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7190   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7191   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7192   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7193   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7194   JUMPTO(SLJIT_NOT_ZERO, label);
7195 
7196   JUMPHERE(jump);
7197   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7198   }
7199 
7200 if (char1_reg == STR_END)
7201   {
7202   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7203   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7204   }
7205 
7206 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7207 }
7208 
do_caselesscmp(compiler_common * common)7209 static void do_caselesscmp(compiler_common *common)
7210 {
7211 DEFINE_COMPILER;
7212 struct sljit_jump *jump;
7213 struct sljit_label *label;
7214 int char1_reg = STR_END;
7215 int char2_reg;
7216 int lcc_table;
7217 int opt_type = 0;
7218 
7219 if (HAS_VIRTUAL_REGISTERS)
7220   {
7221   char2_reg = STACK_TOP;
7222   lcc_table = STACK_LIMIT;
7223   }
7224 else
7225   {
7226   char2_reg = RETURN_ADDR;
7227   lcc_table = TMP3;
7228   }
7229 
7230 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7231   opt_type = 1;
7232 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7233   opt_type = 2;
7234 
7235 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7237 
7238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7239 
7240 if (char2_reg == STACK_TOP)
7241   {
7242   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7243   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7244   }
7245 
7246 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7247 
7248 if (opt_type == 1)
7249   {
7250   label = LABEL();
7251   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7252   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7253   }
7254 else if (opt_type == 2)
7255   {
7256   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7257   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7258 
7259   label = LABEL();
7260   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7261   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7262   }
7263 else
7264   {
7265   label = LABEL();
7266   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7267   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7268   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7269   }
7270 
7271 #if PCRE2_CODE_UNIT_WIDTH != 8
7272 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7273 #endif
7274 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7275 #if PCRE2_CODE_UNIT_WIDTH != 8
7276 JUMPHERE(jump);
7277 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7278 #endif
7279 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7280 #if PCRE2_CODE_UNIT_WIDTH != 8
7281 JUMPHERE(jump);
7282 #endif
7283 
7284 if (opt_type == 0)
7285   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7286 
7287 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7288 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7289 JUMPTO(SLJIT_NOT_ZERO, label);
7290 
7291 JUMPHERE(jump);
7292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7293 
7294 if (opt_type == 2)
7295   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7296 
7297 if (char2_reg == STACK_TOP)
7298   {
7299   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7300   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7301   }
7302 
7303 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7304 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7305 }
7306 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7307 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7308     compare_context *context, jump_list **backtracks)
7309 {
7310 DEFINE_COMPILER;
7311 unsigned int othercasebit = 0;
7312 PCRE2_SPTR othercasechar = NULL;
7313 #ifdef SUPPORT_UNICODE
7314 int utflength;
7315 #endif
7316 
7317 if (caseless && char_has_othercase(common, cc))
7318   {
7319   othercasebit = char_get_othercase_bit(common, cc);
7320   SLJIT_ASSERT(othercasebit);
7321   /* Extracting bit difference info. */
7322 #if PCRE2_CODE_UNIT_WIDTH == 8
7323   othercasechar = cc + (othercasebit >> 8);
7324   othercasebit &= 0xff;
7325 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7326   /* Note that this code only handles characters in the BMP. If there
7327   ever are characters outside the BMP whose othercase differs in only one
7328   bit from itself (there currently are none), this code will need to be
7329   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7330   othercasechar = cc + (othercasebit >> 9);
7331   if ((othercasebit & 0x100) != 0)
7332     othercasebit = (othercasebit & 0xff) << 8;
7333   else
7334     othercasebit &= 0xff;
7335 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7336   }
7337 
7338 if (context->sourcereg == -1)
7339   {
7340 #if PCRE2_CODE_UNIT_WIDTH == 8
7341 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7342   if (context->length >= 4)
7343     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7344   else if (context->length >= 2)
7345     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7346   else
7347 #endif
7348     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349 #elif PCRE2_CODE_UNIT_WIDTH == 16
7350 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7351   if (context->length >= 4)
7352     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7353   else
7354 #endif
7355     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7356 #elif PCRE2_CODE_UNIT_WIDTH == 32
7357   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7359   context->sourcereg = TMP2;
7360   }
7361 
7362 #ifdef SUPPORT_UNICODE
7363 utflength = 1;
7364 if (common->utf && HAS_EXTRALEN(*cc))
7365   utflength += GET_EXTRALEN(*cc);
7366 
7367 do
7368   {
7369 #endif
7370 
7371   context->length -= IN_UCHARS(1);
7372 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7373 
7374   /* Unaligned read is supported. */
7375   if (othercasebit != 0 && othercasechar == cc)
7376     {
7377     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7378     context->oc.asuchars[context->ucharptr] = othercasebit;
7379     }
7380   else
7381     {
7382     context->c.asuchars[context->ucharptr] = *cc;
7383     context->oc.asuchars[context->ucharptr] = 0;
7384     }
7385   context->ucharptr++;
7386 
7387 #if PCRE2_CODE_UNIT_WIDTH == 8
7388   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7389 #else
7390   if (context->ucharptr >= 2 || context->length == 0)
7391 #endif
7392     {
7393     if (context->length >= 4)
7394       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7395     else if (context->length >= 2)
7396       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7397 #if PCRE2_CODE_UNIT_WIDTH == 8
7398     else if (context->length >= 1)
7399       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7401     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7402 
7403     switch(context->ucharptr)
7404       {
7405       case 4 / sizeof(PCRE2_UCHAR):
7406       if (context->oc.asint != 0)
7407         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7408       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7409       break;
7410 
7411       case 2 / sizeof(PCRE2_UCHAR):
7412       if (context->oc.asushort != 0)
7413         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7414       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7415       break;
7416 
7417 #if PCRE2_CODE_UNIT_WIDTH == 8
7418       case 1:
7419       if (context->oc.asbyte != 0)
7420         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7421       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7422       break;
7423 #endif
7424 
7425       default:
7426       SLJIT_UNREACHABLE();
7427       break;
7428       }
7429     context->ucharptr = 0;
7430     }
7431 
7432 #else
7433 
7434   /* Unaligned read is unsupported or in 32 bit mode. */
7435   if (context->length >= 1)
7436     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7437 
7438   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7439 
7440   if (othercasebit != 0 && othercasechar == cc)
7441     {
7442     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7443     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7444     }
7445   else
7446     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7447 
7448 #endif
7449 
7450   cc++;
7451 #ifdef SUPPORT_UNICODE
7452   utflength--;
7453   }
7454 while (utflength > 0);
7455 #endif
7456 
7457 return cc;
7458 }
7459 
7460 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7461 
7462 #define SET_TYPE_OFFSET(value) \
7463   if ((value) != typeoffset) \
7464     { \
7465     if ((value) < typeoffset) \
7466       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7467     else \
7468       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7469     } \
7470   typeoffset = (value);
7471 
7472 #define SET_CHAR_OFFSET(value) \
7473   if ((value) != charoffset) \
7474     { \
7475     if ((value) < charoffset) \
7476       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7477     else \
7478       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7479     } \
7480   charoffset = (value);
7481 
7482 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7483 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7484 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7485 {
7486 DEFINE_COMPILER;
7487 jump_list *found = NULL;
7488 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7489 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7490 struct sljit_jump *jump = NULL;
7491 PCRE2_SPTR ccbegin;
7492 int compares, invertcmp, numberofcmps;
7493 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7494 BOOL utf = common->utf;
7495 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7496 
7497 #ifdef SUPPORT_UNICODE
7498 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7499 BOOL charsaved = FALSE;
7500 int typereg = TMP1;
7501 const sljit_u32 *other_cases;
7502 sljit_uw typeoffset;
7503 #endif /* SUPPORT_UNICODE */
7504 
7505 /* Scanning the necessary info. */
7506 cc++;
7507 ccbegin = cc;
7508 compares = 0;
7509 
7510 if (cc[-1] & XCL_MAP)
7511   {
7512   min = 0;
7513   cc += 32 / sizeof(PCRE2_UCHAR);
7514   }
7515 
7516 while (*cc != XCL_END)
7517   {
7518   compares++;
7519   if (*cc == XCL_SINGLE)
7520     {
7521     cc ++;
7522     GETCHARINCTEST(c, cc);
7523     if (c > max) max = c;
7524     if (c < min) min = c;
7525 #ifdef SUPPORT_UNICODE
7526     needschar = TRUE;
7527 #endif /* SUPPORT_UNICODE */
7528     }
7529   else if (*cc == XCL_RANGE)
7530     {
7531     cc ++;
7532     GETCHARINCTEST(c, cc);
7533     if (c < min) min = c;
7534     GETCHARINCTEST(c, cc);
7535     if (c > max) max = c;
7536 #ifdef SUPPORT_UNICODE
7537     needschar = TRUE;
7538 #endif /* SUPPORT_UNICODE */
7539     }
7540 #ifdef SUPPORT_UNICODE
7541   else
7542     {
7543     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7544     cc++;
7545     if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7546       {
7547       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7548       while (*other_cases != NOTACHAR)
7549         {
7550         if (*other_cases > max) max = *other_cases;
7551         if (*other_cases < min) min = *other_cases;
7552         other_cases++;
7553         }
7554       }
7555     else
7556       {
7557       max = READ_CHAR_MAX;
7558       min = 0;
7559       }
7560 
7561     switch(*cc)
7562       {
7563       case PT_ANY:
7564       /* Any either accepts everything or ignored. */
7565       if (cc[-1] == XCL_PROP)
7566         {
7567         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7568         if (list == backtracks)
7569           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570         return;
7571         }
7572       break;
7573 
7574       case PT_LAMP:
7575       case PT_GC:
7576       case PT_PC:
7577       case PT_ALNUM:
7578       needstype = TRUE;
7579       break;
7580 
7581       case PT_SC:
7582       needsscript = TRUE;
7583       break;
7584 
7585       case PT_SPACE:
7586       case PT_PXSPACE:
7587       case PT_WORD:
7588       case PT_PXGRAPH:
7589       case PT_PXPRINT:
7590       case PT_PXPUNCT:
7591       needstype = TRUE;
7592       needschar = TRUE;
7593       break;
7594 
7595       case PT_CLIST:
7596       case PT_UCNC:
7597       needschar = TRUE;
7598       break;
7599 
7600       default:
7601       SLJIT_UNREACHABLE();
7602       break;
7603       }
7604     cc += 2;
7605     }
7606 #endif /* SUPPORT_UNICODE */
7607   }
7608 SLJIT_ASSERT(compares > 0);
7609 
7610 /* We are not necessary in utf mode even in 8 bit mode. */
7611 cc = ccbegin;
7612 if ((cc[-1] & XCL_NOT) != 0)
7613   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7614 else
7615   {
7616 #ifdef SUPPORT_UNICODE
7617   read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7618 #else /* !SUPPORT_UNICODE */
7619   read_char(common, min, max, NULL, 0);
7620 #endif /* SUPPORT_UNICODE */
7621   }
7622 
7623 if ((cc[-1] & XCL_HASPROP) == 0)
7624   {
7625   if ((cc[-1] & XCL_MAP) != 0)
7626     {
7627     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7628     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7629       {
7630       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7631       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7632       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7633       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7634       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7635       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7636       }
7637 
7638     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7639     JUMPHERE(jump);
7640 
7641     cc += 32 / sizeof(PCRE2_UCHAR);
7642     }
7643   else
7644     {
7645     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7646     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7647     }
7648   }
7649 else if ((cc[-1] & XCL_MAP) != 0)
7650   {
7651   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7652 #ifdef SUPPORT_UNICODE
7653   charsaved = TRUE;
7654 #endif /* SUPPORT_UNICODE */
7655   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7656     {
7657 #if PCRE2_CODE_UNIT_WIDTH == 8
7658     jump = NULL;
7659     if (common->utf)
7660 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7661       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7662 
7663     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7664     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7665     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7666     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7667     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7668     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7669 
7670 #if PCRE2_CODE_UNIT_WIDTH == 8
7671     if (common->utf)
7672 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7673       JUMPHERE(jump);
7674     }
7675 
7676   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7677   cc += 32 / sizeof(PCRE2_UCHAR);
7678   }
7679 
7680 #ifdef SUPPORT_UNICODE
7681 if (needstype || needsscript)
7682   {
7683   if (needschar && !charsaved)
7684     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7685 
7686 #if PCRE2_CODE_UNIT_WIDTH == 32
7687   if (!common->utf)
7688     {
7689     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7690     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7691     JUMPHERE(jump);
7692     }
7693 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7694 
7695   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7696   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7697   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7698   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7699   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7700   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7701   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7702   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7703 
7704   /* Before anything else, we deal with scripts. */
7705   if (needsscript)
7706     {
7707     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7708     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7709     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7710 
7711     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7712 
7713     ccbegin = cc;
7714 
7715     while (*cc != XCL_END)
7716       {
7717       if (*cc == XCL_SINGLE)
7718         {
7719         cc ++;
7720         GETCHARINCTEST(c, cc);
7721         }
7722       else if (*cc == XCL_RANGE)
7723         {
7724         cc ++;
7725         GETCHARINCTEST(c, cc);
7726         GETCHARINCTEST(c, cc);
7727         }
7728       else
7729         {
7730         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7731         cc++;
7732         if (*cc == PT_SC)
7733           {
7734           compares--;
7735           invertcmp = (compares == 0 && list != backtracks);
7736           if (cc[-1] == XCL_NOTPROP)
7737             invertcmp ^= 0x1;
7738           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7739           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7740           }
7741         cc += 2;
7742         }
7743       }
7744 
7745     cc = ccbegin;
7746 
7747     if (needstype)
7748       {
7749       /* TMP2 has already been shifted by 2 */
7750       if (!needschar)
7751         {
7752         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7753         OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7754 
7755         OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7756         }
7757       else
7758         {
7759         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7760         OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7761 
7762         OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7763         OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7764         typereg = RETURN_ADDR;
7765         }
7766       }
7767     else if (needschar)
7768       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7769     }
7770   else if (needstype)
7771     {
7772     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7773     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7774 
7775     if (!needschar)
7776       {
7777       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7778 
7779       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7780       }
7781     else
7782       {
7783       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7784 
7785       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7786       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7787       typereg = RETURN_ADDR;
7788       }
7789     }
7790   else if (needschar)
7791     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7792   }
7793 #endif /* SUPPORT_UNICODE */
7794 
7795 /* Generating code. */
7796 charoffset = 0;
7797 numberofcmps = 0;
7798 #ifdef SUPPORT_UNICODE
7799 typeoffset = 0;
7800 #endif /* SUPPORT_UNICODE */
7801 
7802 while (*cc != XCL_END)
7803   {
7804   compares--;
7805   invertcmp = (compares == 0 && list != backtracks);
7806   jump = NULL;
7807 
7808   if (*cc == XCL_SINGLE)
7809     {
7810     cc ++;
7811     GETCHARINCTEST(c, cc);
7812 
7813     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7814       {
7815       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7816       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7817       numberofcmps++;
7818       }
7819     else if (numberofcmps > 0)
7820       {
7821       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7822       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7823       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7824       numberofcmps = 0;
7825       }
7826     else
7827       {
7828       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7829       numberofcmps = 0;
7830       }
7831     }
7832   else if (*cc == XCL_RANGE)
7833     {
7834     cc ++;
7835     GETCHARINCTEST(c, cc);
7836     SET_CHAR_OFFSET(c);
7837     GETCHARINCTEST(c, cc);
7838 
7839     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7840       {
7841       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7842       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7843       numberofcmps++;
7844       }
7845     else if (numberofcmps > 0)
7846       {
7847       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7848       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7849       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7850       numberofcmps = 0;
7851       }
7852     else
7853       {
7854       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7855       numberofcmps = 0;
7856       }
7857     }
7858 #ifdef SUPPORT_UNICODE
7859   else
7860     {
7861     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7862     if (*cc == XCL_NOTPROP)
7863       invertcmp ^= 0x1;
7864     cc++;
7865     switch(*cc)
7866       {
7867       case PT_ANY:
7868       if (!invertcmp)
7869         jump = JUMP(SLJIT_JUMP);
7870       break;
7871 
7872       case PT_LAMP:
7873       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7874       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7875       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7876       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7877       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7878       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7879       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7880       break;
7881 
7882       case PT_GC:
7883       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7884       SET_TYPE_OFFSET(c);
7885       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7886       break;
7887 
7888       case PT_PC:
7889       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7890       break;
7891 
7892       case PT_SC:
7893       compares++;
7894       /* Do nothing. */
7895       break;
7896 
7897       case PT_SPACE:
7898       case PT_PXSPACE:
7899       SET_CHAR_OFFSET(9);
7900       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7901       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7902 
7903       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7904       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7905 
7906       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7907       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7908 
7909       SET_TYPE_OFFSET(ucp_Zl);
7910       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7911       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7912       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7913       break;
7914 
7915       case PT_WORD:
7916       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7917       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7918       /* Fall through. */
7919 
7920       case PT_ALNUM:
7921       SET_TYPE_OFFSET(ucp_Ll);
7922       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7923       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7924       SET_TYPE_OFFSET(ucp_Nd);
7925       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7926       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7927       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7928       break;
7929 
7930       case PT_CLIST:
7931       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7932 
7933       /* At least three characters are required.
7934          Otherwise this case would be handled by the normal code path. */
7935       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7936       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7937 
7938       /* Optimizing character pairs, if their difference is power of 2. */
7939       if (is_powerof2(other_cases[1] ^ other_cases[0]))
7940         {
7941         if (charoffset == 0)
7942           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7943         else
7944           {
7945           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7946           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7947           }
7948         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7949         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7950         other_cases += 2;
7951         }
7952       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7953         {
7954         if (charoffset == 0)
7955           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7956         else
7957           {
7958           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7959           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7960           }
7961         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7962         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7963 
7964         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7965         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7966 
7967         other_cases += 3;
7968         }
7969       else
7970         {
7971         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7972         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7973         }
7974 
7975       while (*other_cases != NOTACHAR)
7976         {
7977         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7978         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7979         }
7980       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7981       break;
7982 
7983       case PT_UCNC:
7984       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7985       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7986       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7987       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7988       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7989       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7990 
7991       SET_CHAR_OFFSET(0xa0);
7992       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7993       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7994       SET_CHAR_OFFSET(0);
7995       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7996       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7997       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7998       break;
7999 
8000       case PT_PXGRAPH:
8001       /* C and Z groups are the farthest two groups. */
8002       SET_TYPE_OFFSET(ucp_Ll);
8003       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8004       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8005 
8006       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8007 
8008       /* In case of ucp_Cf, we overwrite the result. */
8009       SET_CHAR_OFFSET(0x2066);
8010       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8011       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8012 
8013       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8014       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8015 
8016       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8017       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8018 
8019       JUMPHERE(jump);
8020       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8021       break;
8022 
8023       case PT_PXPRINT:
8024       /* C and Z groups are the farthest two groups. */
8025       SET_TYPE_OFFSET(ucp_Ll);
8026       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8027       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8028 
8029       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8030       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8031 
8032       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8033 
8034       /* In case of ucp_Cf, we overwrite the result. */
8035       SET_CHAR_OFFSET(0x2066);
8036       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8037       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8038 
8039       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8040       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8041 
8042       JUMPHERE(jump);
8043       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8044       break;
8045 
8046       case PT_PXPUNCT:
8047       SET_TYPE_OFFSET(ucp_Sc);
8048       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8049       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8050 
8051       SET_CHAR_OFFSET(0);
8052       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
8053       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8054 
8055       SET_TYPE_OFFSET(ucp_Pc);
8056       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8057       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8058       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8059       break;
8060 
8061       default:
8062       SLJIT_UNREACHABLE();
8063       break;
8064       }
8065     cc += 2;
8066     }
8067 #endif /* SUPPORT_UNICODE */
8068 
8069   if (jump != NULL)
8070     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8071   }
8072 
8073 if (found != NULL)
8074   set_jumps(found, LABEL());
8075 }
8076 
8077 #undef SET_TYPE_OFFSET
8078 #undef SET_CHAR_OFFSET
8079 
8080 #endif
8081 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8082 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8083 {
8084 DEFINE_COMPILER;
8085 int length;
8086 struct sljit_jump *jump[4];
8087 #ifdef SUPPORT_UNICODE
8088 struct sljit_label *label;
8089 #endif /* SUPPORT_UNICODE */
8090 
8091 switch(type)
8092   {
8093   case OP_SOD:
8094   if (HAS_VIRTUAL_REGISTERS)
8095     {
8096     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8097     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8098     }
8099   else
8100     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8101   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8102   return cc;
8103 
8104   case OP_SOM:
8105   if (HAS_VIRTUAL_REGISTERS)
8106     {
8107     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8108     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8109     }
8110   else
8111     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8112   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8113   return cc;
8114 
8115   case OP_NOT_WORD_BOUNDARY:
8116   case OP_WORD_BOUNDARY:
8117   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8118 #ifdef SUPPORT_UNICODE
8119   if (common->invalid_utf)
8120     {
8121     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8122     return cc;
8123     }
8124 #endif /* SUPPORT_UNICODE */
8125   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8126   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8127   return cc;
8128 
8129   case OP_EODN:
8130   /* Requires rather complex checks. */
8131   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8132   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8133     {
8134     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8135     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8136     if (common->mode == PCRE2_JIT_COMPLETE)
8137       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8138     else
8139       {
8140       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8141       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8142       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8143       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8144       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8145       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8146       check_partial(common, TRUE);
8147       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8148       JUMPHERE(jump[1]);
8149       }
8150     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8151     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8152     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8153     }
8154   else if (common->nltype == NLTYPE_FIXED)
8155     {
8156     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8157     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8158     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8159     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8160     }
8161   else
8162     {
8163     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8164     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8165     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8166     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8167     jump[2] = JUMP(SLJIT_GREATER);
8168     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8169     /* Equal. */
8170     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8171     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8172     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8173 
8174     JUMPHERE(jump[1]);
8175     if (common->nltype == NLTYPE_ANYCRLF)
8176       {
8177       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8178       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8179       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8180       }
8181     else
8182       {
8183       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8184       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8185       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8186       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8187       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8188       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8189       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8190       }
8191     JUMPHERE(jump[2]);
8192     JUMPHERE(jump[3]);
8193     }
8194   JUMPHERE(jump[0]);
8195   if (common->mode != PCRE2_JIT_COMPLETE)
8196     check_partial(common, TRUE);
8197   return cc;
8198 
8199   case OP_EOD:
8200   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8201   if (common->mode != PCRE2_JIT_COMPLETE)
8202     check_partial(common, TRUE);
8203   return cc;
8204 
8205   case OP_DOLL:
8206   if (HAS_VIRTUAL_REGISTERS)
8207     {
8208     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8209     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8210     }
8211   else
8212     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8213   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8214 
8215   if (!common->endonly)
8216     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8217   else
8218     {
8219     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8220     check_partial(common, FALSE);
8221     }
8222   return cc;
8223 
8224   case OP_DOLLM:
8225   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8226   if (HAS_VIRTUAL_REGISTERS)
8227     {
8228     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8229     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8230     }
8231   else
8232     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8233   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8234   check_partial(common, FALSE);
8235   jump[0] = JUMP(SLJIT_JUMP);
8236   JUMPHERE(jump[1]);
8237 
8238   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8239     {
8240     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8241     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8242     if (common->mode == PCRE2_JIT_COMPLETE)
8243       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8244     else
8245       {
8246       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8247       /* STR_PTR = STR_END - IN_UCHARS(1) */
8248       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8249       check_partial(common, TRUE);
8250       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8251       JUMPHERE(jump[1]);
8252       }
8253 
8254     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8255     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8256     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8257     }
8258   else
8259     {
8260     peek_char(common, common->nlmax, TMP3, 0, NULL);
8261     check_newlinechar(common, common->nltype, backtracks, FALSE);
8262     }
8263   JUMPHERE(jump[0]);
8264   return cc;
8265 
8266   case OP_CIRC:
8267   if (HAS_VIRTUAL_REGISTERS)
8268     {
8269     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8270     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8271     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8272     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8273     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8274     }
8275   else
8276     {
8277     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8278     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8279     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8280     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8281     }
8282   return cc;
8283 
8284   case OP_CIRCM:
8285   /* TMP2 might be used by peek_char_back. */
8286   if (HAS_VIRTUAL_REGISTERS)
8287     {
8288     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8289     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8290     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8291     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8292     }
8293   else
8294     {
8295     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8296     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8297     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8298     }
8299   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8300   jump[0] = JUMP(SLJIT_JUMP);
8301   JUMPHERE(jump[1]);
8302 
8303   if (!common->alt_circumflex)
8304     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8305 
8306   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8307     {
8308     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8309     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8310     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8311     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8312     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8313     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8314     }
8315   else
8316     {
8317     peek_char_back(common, common->nlmax, backtracks);
8318     check_newlinechar(common, common->nltype, backtracks, FALSE);
8319     }
8320   JUMPHERE(jump[0]);
8321   return cc;
8322 
8323   case OP_REVERSE:
8324   length = GET(cc, 0);
8325   if (length == 0)
8326     return cc + LINK_SIZE;
8327   if (HAS_VIRTUAL_REGISTERS)
8328     {
8329     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8330     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8331     }
8332   else
8333     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8334 #ifdef SUPPORT_UNICODE
8335   if (common->utf)
8336     {
8337     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8338     label = LABEL();
8339     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8340     move_back(common, backtracks, FALSE);
8341     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8342     JUMPTO(SLJIT_NOT_ZERO, label);
8343     }
8344   else
8345 #endif
8346     {
8347     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8348     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8349     }
8350   check_start_used_ptr(common);
8351   return cc + LINK_SIZE;
8352   }
8353 SLJIT_UNREACHABLE();
8354 return cc;
8355 }
8356 
8357 #ifdef SUPPORT_UNICODE
8358 
8359 #if PCRE2_CODE_UNIT_WIDTH != 32
8360 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8361 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8362 {
8363 PCRE2_SPTR start_subject = args->begin;
8364 PCRE2_SPTR end_subject = args->end;
8365 int lgb, rgb, ricount;
8366 PCRE2_SPTR prevcc, endcc, bptr;
8367 BOOL first = TRUE;
8368 uint32_t c;
8369 
8370 prevcc = cc;
8371 endcc = NULL;
8372 do
8373   {
8374   GETCHARINC(c, cc);
8375   rgb = UCD_GRAPHBREAK(c);
8376 
8377   if (first)
8378     {
8379     lgb = rgb;
8380     endcc = cc;
8381     first = FALSE;
8382     continue;
8383     }
8384 
8385   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8386     break;
8387 
8388   /* Not breaking between Regional Indicators is allowed only if there
8389   are an even number of preceding RIs. */
8390 
8391   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8392     {
8393     ricount = 0;
8394     bptr = prevcc;
8395 
8396     /* bptr is pointing to the left-hand character */
8397     while (bptr > start_subject)
8398       {
8399       bptr--;
8400       BACKCHAR(bptr);
8401       GETCHAR(c, bptr);
8402 
8403       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8404         break;
8405 
8406       ricount++;
8407       }
8408 
8409     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8410     }
8411 
8412   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8413   allows any number of them before a following Extended_Pictographic. */
8414 
8415   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8416        lgb != ucp_gbExtended_Pictographic)
8417     lgb = rgb;
8418 
8419   prevcc = endcc;
8420   endcc = cc;
8421   }
8422 while (cc < end_subject);
8423 
8424 return endcc;
8425 }
8426 
8427 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8428 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8429 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8430 {
8431 PCRE2_SPTR start_subject = args->begin;
8432 PCRE2_SPTR end_subject = args->end;
8433 int lgb, rgb, ricount;
8434 PCRE2_SPTR prevcc, endcc, bptr;
8435 BOOL first = TRUE;
8436 uint32_t c;
8437 
8438 prevcc = cc;
8439 endcc = NULL;
8440 do
8441   {
8442   GETCHARINC_INVALID(c, cc, end_subject, break);
8443   rgb = UCD_GRAPHBREAK(c);
8444 
8445   if (first)
8446     {
8447     lgb = rgb;
8448     endcc = cc;
8449     first = FALSE;
8450     continue;
8451     }
8452 
8453   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8454     break;
8455 
8456   /* Not breaking between Regional Indicators is allowed only if there
8457   are an even number of preceding RIs. */
8458 
8459   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8460     {
8461     ricount = 0;
8462     bptr = prevcc;
8463 
8464     /* bptr is pointing to the left-hand character */
8465     while (bptr > start_subject)
8466       {
8467       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8468 
8469       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8470         break;
8471 
8472       ricount++;
8473       }
8474 
8475     if ((ricount & 1) != 0)
8476       break;  /* Grapheme break required */
8477     }
8478 
8479   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8480   allows any number of them before a following Extended_Pictographic. */
8481 
8482   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8483        lgb != ucp_gbExtended_Pictographic)
8484     lgb = rgb;
8485 
8486   prevcc = endcc;
8487   endcc = cc;
8488   }
8489 while (cc < end_subject);
8490 
8491 return endcc;
8492 }
8493 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8494 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8495 {
8496 PCRE2_SPTR start_subject = args->begin;
8497 PCRE2_SPTR end_subject = args->end;
8498 int lgb, rgb, ricount;
8499 PCRE2_SPTR bptr;
8500 uint32_t c;
8501 
8502 /* Patch by PH */
8503 /* GETCHARINC(c, cc); */
8504 c = *cc++;
8505 
8506 #if PCRE2_CODE_UNIT_WIDTH == 32
8507 if (c >= 0x110000)
8508   return NULL;
8509 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8510 lgb = UCD_GRAPHBREAK(c);
8511 
8512 while (cc < end_subject)
8513   {
8514   c = *cc;
8515 #if PCRE2_CODE_UNIT_WIDTH == 32
8516   if (c >= 0x110000)
8517     break;
8518 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8519   rgb = UCD_GRAPHBREAK(c);
8520 
8521   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8522     break;
8523 
8524   /* Not breaking between Regional Indicators is allowed only if there
8525   are an even number of preceding RIs. */
8526 
8527   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8528     {
8529     ricount = 0;
8530     bptr = cc - 1;
8531 
8532     /* bptr is pointing to the left-hand character */
8533     while (bptr > start_subject)
8534       {
8535       bptr--;
8536       c = *bptr;
8537 #if PCRE2_CODE_UNIT_WIDTH == 32
8538       if (c >= 0x110000)
8539         break;
8540 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8541 
8542       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8543 
8544       ricount++;
8545       }
8546 
8547     if ((ricount & 1) != 0)
8548       break;  /* Grapheme break required */
8549     }
8550 
8551   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8552   allows any number of them before a following Extended_Pictographic. */
8553 
8554   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8555        lgb != ucp_gbExtended_Pictographic)
8556     lgb = rgb;
8557 
8558   cc++;
8559   }
8560 
8561 return cc;
8562 }
8563 
8564 #endif /* SUPPORT_UNICODE */
8565 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8566 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8567 {
8568 DEFINE_COMPILER;
8569 int length;
8570 unsigned int c, oc, bit;
8571 compare_context context;
8572 struct sljit_jump *jump[3];
8573 jump_list *end_list;
8574 #ifdef SUPPORT_UNICODE
8575 PCRE2_UCHAR propdata[5];
8576 #endif /* SUPPORT_UNICODE */
8577 
8578 switch(type)
8579   {
8580   case OP_NOT_DIGIT:
8581   case OP_DIGIT:
8582   /* Digits are usually 0-9, so it is worth to optimize them. */
8583   if (check_str_ptr)
8584     detect_partial_match(common, backtracks);
8585 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8586   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8587     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8588   else
8589 #endif
8590     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8591     /* Flip the starting bit in the negative case. */
8592   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8593   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8594   return cc;
8595 
8596   case OP_NOT_WHITESPACE:
8597   case OP_WHITESPACE:
8598   if (check_str_ptr)
8599     detect_partial_match(common, backtracks);
8600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8601   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8602     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8603   else
8604 #endif
8605     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8606   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8607   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8608   return cc;
8609 
8610   case OP_NOT_WORDCHAR:
8611   case OP_WORDCHAR:
8612   if (check_str_ptr)
8613     detect_partial_match(common, backtracks);
8614 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8615   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8616     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8617   else
8618 #endif
8619     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8620   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8621   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8622   return cc;
8623 
8624   case OP_ANY:
8625   if (check_str_ptr)
8626     detect_partial_match(common, backtracks);
8627   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8628   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8629     {
8630     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8631     end_list = NULL;
8632     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8633       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8634     else
8635       check_str_end(common, &end_list);
8636 
8637     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8638     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8639     set_jumps(end_list, LABEL());
8640     JUMPHERE(jump[0]);
8641     }
8642   else
8643     check_newlinechar(common, common->nltype, backtracks, TRUE);
8644   return cc;
8645 
8646   case OP_ALLANY:
8647   if (check_str_ptr)
8648     detect_partial_match(common, backtracks);
8649 #ifdef SUPPORT_UNICODE
8650   if (common->utf)
8651     {
8652     if (common->invalid_utf)
8653       {
8654       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8655       return cc;
8656       }
8657 
8658 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8659     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8660     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8661 #if PCRE2_CODE_UNIT_WIDTH == 8
8662     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8663     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8664     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8665 #elif PCRE2_CODE_UNIT_WIDTH == 16
8666     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8667     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8668     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8669     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8670     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8671     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8672 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8673     JUMPHERE(jump[0]);
8674     return cc;
8675 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8676     }
8677 #endif /* SUPPORT_UNICODE */
8678   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8679   return cc;
8680 
8681   case OP_ANYBYTE:
8682   if (check_str_ptr)
8683     detect_partial_match(common, backtracks);
8684   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8685   return cc;
8686 
8687 #ifdef SUPPORT_UNICODE
8688   case OP_NOTPROP:
8689   case OP_PROP:
8690   propdata[0] = XCL_HASPROP;
8691   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8692   propdata[2] = cc[0];
8693   propdata[3] = cc[1];
8694   propdata[4] = XCL_END;
8695   if (check_str_ptr)
8696     detect_partial_match(common, backtracks);
8697   compile_xclass_matchingpath(common, propdata, backtracks);
8698   return cc + 2;
8699 #endif
8700 
8701   case OP_ANYNL:
8702   if (check_str_ptr)
8703     detect_partial_match(common, backtracks);
8704   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8705   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8706   /* We don't need to handle soft partial matching case. */
8707   end_list = NULL;
8708   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8709     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8710   else
8711     check_str_end(common, &end_list);
8712   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8713   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8714   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8715   jump[2] = JUMP(SLJIT_JUMP);
8716   JUMPHERE(jump[0]);
8717   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8718   set_jumps(end_list, LABEL());
8719   JUMPHERE(jump[1]);
8720   JUMPHERE(jump[2]);
8721   return cc;
8722 
8723   case OP_NOT_HSPACE:
8724   case OP_HSPACE:
8725   if (check_str_ptr)
8726     detect_partial_match(common, backtracks);
8727 
8728   if (type == OP_NOT_HSPACE)
8729     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8730   else
8731     read_char(common, 0x9, 0x3000, NULL, 0);
8732 
8733   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8734   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8735   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8736   return cc;
8737 
8738   case OP_NOT_VSPACE:
8739   case OP_VSPACE:
8740   if (check_str_ptr)
8741     detect_partial_match(common, backtracks);
8742 
8743   if (type == OP_NOT_VSPACE)
8744     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8745   else
8746     read_char(common, 0xa, 0x2029, NULL, 0);
8747 
8748   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8749   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8750   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8751   return cc;
8752 
8753 #ifdef SUPPORT_UNICODE
8754   case OP_EXTUNI:
8755   if (check_str_ptr)
8756     detect_partial_match(common, backtracks);
8757 
8758   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8759   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8760 
8761 #if PCRE2_CODE_UNIT_WIDTH != 32
8762   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8763     common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8764   if (common->invalid_utf)
8765     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8766 #else
8767   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8768     common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8769   if (!common->utf || common->invalid_utf)
8770     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8771 #endif
8772 
8773   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8774 
8775   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8776     {
8777     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8778     /* Since we successfully read a char above, partial matching must occure. */
8779     check_partial(common, TRUE);
8780     JUMPHERE(jump[0]);
8781     }
8782   return cc;
8783 #endif
8784 
8785   case OP_CHAR:
8786   case OP_CHARI:
8787   length = 1;
8788 #ifdef SUPPORT_UNICODE
8789   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8790 #endif
8791 
8792   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8793     detect_partial_match(common, backtracks);
8794 
8795   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8796     {
8797     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8798     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8799       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8800 
8801     context.length = IN_UCHARS(length);
8802     context.sourcereg = -1;
8803 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8804     context.ucharptr = 0;
8805 #endif
8806     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8807     }
8808 
8809 #ifdef SUPPORT_UNICODE
8810   if (common->utf)
8811     {
8812     GETCHAR(c, cc);
8813     }
8814   else
8815 #endif
8816     c = *cc;
8817 
8818   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8819 
8820   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8821     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8822 
8823   oc = char_othercase(common, c);
8824   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8825 
8826   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8827 
8828   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8829     {
8830     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8831     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8832     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8833     }
8834   else
8835     {
8836     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8837     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8838     JUMPHERE(jump[0]);
8839     }
8840   return cc + length;
8841 
8842   case OP_NOT:
8843   case OP_NOTI:
8844   if (check_str_ptr)
8845     detect_partial_match(common, backtracks);
8846 
8847   length = 1;
8848 #ifdef SUPPORT_UNICODE
8849   if (common->utf)
8850     {
8851 #if PCRE2_CODE_UNIT_WIDTH == 8
8852     c = *cc;
8853     if (c < 128 && !common->invalid_utf)
8854       {
8855       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8856       if (type == OP_NOT || !char_has_othercase(common, cc))
8857         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8858       else
8859         {
8860         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8861         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8862         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8863         }
8864       /* Skip the variable-length character. */
8865       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8866       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8867       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8868       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8869       JUMPHERE(jump[0]);
8870       return cc + 1;
8871       }
8872     else
8873 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8874       {
8875       GETCHARLEN(c, cc, length);
8876       }
8877     }
8878   else
8879 #endif /* SUPPORT_UNICODE */
8880     c = *cc;
8881 
8882   if (type == OP_NOT || !char_has_othercase(common, cc))
8883     {
8884     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8885     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8886     }
8887   else
8888     {
8889     oc = char_othercase(common, c);
8890     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8891     bit = c ^ oc;
8892     if (is_powerof2(bit))
8893       {
8894       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8895       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8896       }
8897     else
8898       {
8899       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8900       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8901       }
8902     }
8903   return cc + length;
8904 
8905   case OP_CLASS:
8906   case OP_NCLASS:
8907   if (check_str_ptr)
8908     detect_partial_match(common, backtracks);
8909 
8910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8911   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8912   if (type == OP_NCLASS)
8913     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8914   else
8915     read_char(common, 0, bit, NULL, 0);
8916 #else
8917   if (type == OP_NCLASS)
8918     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8919   else
8920     read_char(common, 0, 255, NULL, 0);
8921 #endif
8922 
8923   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8924     return cc + 32 / sizeof(PCRE2_UCHAR);
8925 
8926 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8927   jump[0] = NULL;
8928   if (common->utf)
8929     {
8930     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8931     if (type == OP_CLASS)
8932       {
8933       add_jump(compiler, backtracks, jump[0]);
8934       jump[0] = NULL;
8935       }
8936     }
8937 #elif PCRE2_CODE_UNIT_WIDTH != 8
8938   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8939   if (type == OP_CLASS)
8940     {
8941     add_jump(compiler, backtracks, jump[0]);
8942     jump[0] = NULL;
8943     }
8944 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8945 
8946   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8947   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8948   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8949   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8950   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8951   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8952 
8953 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8954   if (jump[0] != NULL)
8955     JUMPHERE(jump[0]);
8956 #endif
8957   return cc + 32 / sizeof(PCRE2_UCHAR);
8958 
8959 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8960   case OP_XCLASS:
8961   if (check_str_ptr)
8962     detect_partial_match(common, backtracks);
8963   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8964   return cc + GET(cc, 0) - 1;
8965 #endif
8966   }
8967 SLJIT_UNREACHABLE();
8968 return cc;
8969 }
8970 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8971 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8972 {
8973 /* This function consumes at least one input character. */
8974 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8975 DEFINE_COMPILER;
8976 PCRE2_SPTR ccbegin = cc;
8977 compare_context context;
8978 int size;
8979 
8980 context.length = 0;
8981 do
8982   {
8983   if (cc >= ccend)
8984     break;
8985 
8986   if (*cc == OP_CHAR)
8987     {
8988     size = 1;
8989 #ifdef SUPPORT_UNICODE
8990     if (common->utf && HAS_EXTRALEN(cc[1]))
8991       size += GET_EXTRALEN(cc[1]);
8992 #endif
8993     }
8994   else if (*cc == OP_CHARI)
8995     {
8996     size = 1;
8997 #ifdef SUPPORT_UNICODE
8998     if (common->utf)
8999       {
9000       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9001         size = 0;
9002       else if (HAS_EXTRALEN(cc[1]))
9003         size += GET_EXTRALEN(cc[1]);
9004       }
9005     else
9006 #endif
9007     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9008       size = 0;
9009     }
9010   else
9011     size = 0;
9012 
9013   cc += 1 + size;
9014   context.length += IN_UCHARS(size);
9015   }
9016 while (size > 0 && context.length <= 128);
9017 
9018 cc = ccbegin;
9019 if (context.length > 0)
9020   {
9021   /* We have a fixed-length byte sequence. */
9022   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9023   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9024 
9025   context.sourcereg = -1;
9026 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9027   context.ucharptr = 0;
9028 #endif
9029   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9030   return cc;
9031   }
9032 
9033 /* A non-fixed length character will be checked if length == 0. */
9034 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9035 }
9036 
9037 /* Forward definitions. */
9038 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9039 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9040 
9041 #define PUSH_BACKTRACK(size, ccstart, error) \
9042   do \
9043     { \
9044     backtrack = sljit_alloc_memory(compiler, (size)); \
9045     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9046       return error; \
9047     memset(backtrack, 0, size); \
9048     backtrack->prev = parent->top; \
9049     backtrack->cc = (ccstart); \
9050     parent->top = backtrack; \
9051     } \
9052   while (0)
9053 
9054 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9055   do \
9056     { \
9057     backtrack = sljit_alloc_memory(compiler, (size)); \
9058     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9059       return; \
9060     memset(backtrack, 0, size); \
9061     backtrack->prev = parent->top; \
9062     backtrack->cc = (ccstart); \
9063     parent->top = backtrack; \
9064     } \
9065   while (0)
9066 
9067 #define BACKTRACK_AS(type) ((type *)backtrack)
9068 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9069 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9070 {
9071 /* The OVECTOR offset goes to TMP2. */
9072 DEFINE_COMPILER;
9073 int count = GET2(cc, 1 + IMM2_SIZE);
9074 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9075 unsigned int offset;
9076 jump_list *found = NULL;
9077 
9078 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9079 
9080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9081 
9082 count--;
9083 while (count-- > 0)
9084   {
9085   offset = GET2(slot, 0) << 1;
9086   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9087   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9088   slot += common->name_entry_size;
9089   }
9090 
9091 offset = GET2(slot, 0) << 1;
9092 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9093 if (backtracks != NULL && !common->unset_backref)
9094   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9095 
9096 set_jumps(found, LABEL());
9097 }
9098 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9099 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9100 {
9101 DEFINE_COMPILER;
9102 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9103 int offset = 0;
9104 struct sljit_jump *jump = NULL;
9105 struct sljit_jump *partial;
9106 struct sljit_jump *nopartial;
9107 #if defined SUPPORT_UNICODE
9108 struct sljit_label *loop;
9109 struct sljit_label *caseless_loop;
9110 jump_list *no_match = NULL;
9111 int source_reg = COUNT_MATCH;
9112 int source_end_reg = ARGUMENTS;
9113 int char1_reg = STACK_LIMIT;
9114 #endif /* SUPPORT_UNICODE */
9115 
9116 if (ref)
9117   {
9118   offset = GET2(cc, 1) << 1;
9119   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9120   /* OVECTOR(1) contains the "string begin - 1" constant. */
9121   if (withchecks && !common->unset_backref)
9122     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9123   }
9124 else
9125   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9126 
9127 #if defined SUPPORT_UNICODE
9128 if (common->utf && *cc == OP_REFI)
9129   {
9130   SLJIT_ASSERT(common->iref_ptr != 0);
9131 
9132   if (ref)
9133     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9134   else
9135     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9136 
9137   if (withchecks && emptyfail)
9138     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9139 
9140   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9141   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9142   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9143 
9144   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9145   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9146 
9147   loop = LABEL();
9148   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9149   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9150 
9151   /* Read original character. It must be a valid UTF character. */
9152   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9153   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9154 
9155   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9156 
9157   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9158   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9159   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9160 
9161   /* Read second character. */
9162   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9163 
9164   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9165 
9166   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9167 
9168   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9169 
9170   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9171   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9172   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9173 
9174   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9175 
9176   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9177   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9178   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9179   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9180 
9181   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9182   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9183   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9184 
9185   caseless_loop = LABEL();
9186   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9187   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9188   OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9189   JUMPTO(SLJIT_EQUAL, loop);
9190   JUMPTO(SLJIT_LESS, caseless_loop);
9191 
9192   set_jumps(no_match, LABEL());
9193   if (common->mode == PCRE2_JIT_COMPLETE)
9194     JUMPHERE(partial);
9195 
9196   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9197   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9198   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9199   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9200 
9201   if (common->mode != PCRE2_JIT_COMPLETE)
9202     {
9203     JUMPHERE(partial);
9204     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9205     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9206     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9207 
9208     check_partial(common, FALSE);
9209     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9210     }
9211 
9212   JUMPHERE(jump);
9213   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9214   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9215   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9216   return;
9217   }
9218 else
9219 #endif /* SUPPORT_UNICODE */
9220   {
9221   if (ref)
9222     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9223   else
9224     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9225 
9226   if (withchecks)
9227     jump = JUMP(SLJIT_ZERO);
9228 
9229   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9230   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9231   if (common->mode == PCRE2_JIT_COMPLETE)
9232     add_jump(compiler, backtracks, partial);
9233 
9234   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9235   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9236 
9237   if (common->mode != PCRE2_JIT_COMPLETE)
9238     {
9239     nopartial = JUMP(SLJIT_JUMP);
9240     JUMPHERE(partial);
9241     /* TMP2 -= STR_END - STR_PTR */
9242     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9243     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9244     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9245     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9246     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9247     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9248     JUMPHERE(partial);
9249     check_partial(common, FALSE);
9250     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9251     JUMPHERE(nopartial);
9252     }
9253   }
9254 
9255 if (jump != NULL)
9256   {
9257   if (emptyfail)
9258     add_jump(compiler, backtracks, jump);
9259   else
9260     JUMPHERE(jump);
9261   }
9262 }
9263 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9264 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9265 {
9266 DEFINE_COMPILER;
9267 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9268 backtrack_common *backtrack;
9269 PCRE2_UCHAR type;
9270 int offset = 0;
9271 struct sljit_label *label;
9272 struct sljit_jump *zerolength;
9273 struct sljit_jump *jump = NULL;
9274 PCRE2_SPTR ccbegin = cc;
9275 int min = 0, max = 0;
9276 BOOL minimize;
9277 
9278 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9279 
9280 if (ref)
9281   offset = GET2(cc, 1) << 1;
9282 else
9283   cc += IMM2_SIZE;
9284 type = cc[1 + IMM2_SIZE];
9285 
9286 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9287 minimize = (type & 0x1) != 0;
9288 switch(type)
9289   {
9290   case OP_CRSTAR:
9291   case OP_CRMINSTAR:
9292   min = 0;
9293   max = 0;
9294   cc += 1 + IMM2_SIZE + 1;
9295   break;
9296   case OP_CRPLUS:
9297   case OP_CRMINPLUS:
9298   min = 1;
9299   max = 0;
9300   cc += 1 + IMM2_SIZE + 1;
9301   break;
9302   case OP_CRQUERY:
9303   case OP_CRMINQUERY:
9304   min = 0;
9305   max = 1;
9306   cc += 1 + IMM2_SIZE + 1;
9307   break;
9308   case OP_CRRANGE:
9309   case OP_CRMINRANGE:
9310   min = GET2(cc, 1 + IMM2_SIZE + 1);
9311   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9312   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9313   break;
9314   default:
9315   SLJIT_UNREACHABLE();
9316   break;
9317   }
9318 
9319 if (!minimize)
9320   {
9321   if (min == 0)
9322     {
9323     allocate_stack(common, 2);
9324     if (ref)
9325       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9326     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9327     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9328     /* Temporary release of STR_PTR. */
9329     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9330     /* Handles both invalid and empty cases. Since the minimum repeat,
9331     is zero the invalid case is basically the same as an empty case. */
9332     if (ref)
9333       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9334     else
9335       {
9336       compile_dnref_search(common, ccbegin, NULL);
9337       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9338       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9339       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9340       }
9341     /* Restore if not zero length. */
9342     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9343     }
9344   else
9345     {
9346     allocate_stack(common, 1);
9347     if (ref)
9348       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9349     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9350     if (ref)
9351       {
9352       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9353       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9354       }
9355     else
9356       {
9357       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9358       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9359       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9360       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9361       }
9362     }
9363 
9364   if (min > 1 || max > 1)
9365     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9366 
9367   label = LABEL();
9368   if (!ref)
9369     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9370   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9371 
9372   if (min > 1 || max > 1)
9373     {
9374     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9375     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9376     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9377     if (min > 1)
9378       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9379     if (max > 1)
9380       {
9381       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9382       allocate_stack(common, 1);
9383       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9384       JUMPTO(SLJIT_JUMP, label);
9385       JUMPHERE(jump);
9386       }
9387     }
9388 
9389   if (max == 0)
9390     {
9391     /* Includes min > 1 case as well. */
9392     allocate_stack(common, 1);
9393     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9394     JUMPTO(SLJIT_JUMP, label);
9395     }
9396 
9397   JUMPHERE(zerolength);
9398   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9399 
9400   count_match(common);
9401   return cc;
9402   }
9403 
9404 allocate_stack(common, ref ? 2 : 3);
9405 if (ref)
9406   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9408 if (type != OP_CRMINSTAR)
9409   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9410 
9411 if (min == 0)
9412   {
9413   /* Handles both invalid and empty cases. Since the minimum repeat,
9414   is zero the invalid case is basically the same as an empty case. */
9415   if (ref)
9416     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9417   else
9418     {
9419     compile_dnref_search(common, ccbegin, NULL);
9420     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9421     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9422     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9423     }
9424   /* Length is non-zero, we can match real repeats. */
9425   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9426   jump = JUMP(SLJIT_JUMP);
9427   }
9428 else
9429   {
9430   if (ref)
9431     {
9432     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9433     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9434     }
9435   else
9436     {
9437     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9438     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9439     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9440     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9441     }
9442   }
9443 
9444 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9445 if (max > 0)
9446   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9447 
9448 if (!ref)
9449   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9450 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9452 
9453 if (min > 1)
9454   {
9455   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9456   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9457   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9458   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9459   }
9460 else if (max > 0)
9461   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9462 
9463 if (jump != NULL)
9464   JUMPHERE(jump);
9465 JUMPHERE(zerolength);
9466 
9467 count_match(common);
9468 return cc;
9469 }
9470 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9471 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9472 {
9473 DEFINE_COMPILER;
9474 backtrack_common *backtrack;
9475 recurse_entry *entry = common->entries;
9476 recurse_entry *prev = NULL;
9477 sljit_sw start = GET(cc, 1);
9478 PCRE2_SPTR start_cc;
9479 BOOL needs_control_head;
9480 
9481 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9482 
9483 /* Inlining simple patterns. */
9484 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9485   {
9486   start_cc = common->start + start;
9487   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9488   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9489   return cc + 1 + LINK_SIZE;
9490   }
9491 
9492 while (entry != NULL)
9493   {
9494   if (entry->start == start)
9495     break;
9496   prev = entry;
9497   entry = entry->next;
9498   }
9499 
9500 if (entry == NULL)
9501   {
9502   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9503   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9504     return NULL;
9505   entry->next = NULL;
9506   entry->entry_label = NULL;
9507   entry->backtrack_label = NULL;
9508   entry->entry_calls = NULL;
9509   entry->backtrack_calls = NULL;
9510   entry->start = start;
9511 
9512   if (prev != NULL)
9513     prev->next = entry;
9514   else
9515     common->entries = entry;
9516   }
9517 
9518 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9519 
9520 if (entry->entry_label == NULL)
9521   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9522 else
9523   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9524 /* Leave if the match is failed. */
9525 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9526 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9527 return cc + 1 + LINK_SIZE;
9528 }
9529 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9530 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9531 {
9532 PCRE2_SPTR begin;
9533 PCRE2_SIZE *ovector;
9534 sljit_u32 oveccount, capture_top;
9535 
9536 if (arguments->callout == NULL)
9537   return 0;
9538 
9539 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9540 
9541 begin = arguments->begin;
9542 ovector = (PCRE2_SIZE*)(callout_block + 1);
9543 oveccount = callout_block->capture_top;
9544 
9545 SLJIT_ASSERT(oveccount >= 1);
9546 
9547 callout_block->version = 2;
9548 callout_block->callout_flags = 0;
9549 
9550 /* Offsets in subject. */
9551 callout_block->subject_length = arguments->end - arguments->begin;
9552 callout_block->start_match = jit_ovector[0] - begin;
9553 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9554 callout_block->subject = begin;
9555 
9556 /* Convert and copy the JIT offset vector to the ovector array. */
9557 callout_block->capture_top = 1;
9558 callout_block->offset_vector = ovector;
9559 
9560 ovector[0] = PCRE2_UNSET;
9561 ovector[1] = PCRE2_UNSET;
9562 ovector += 2;
9563 jit_ovector += 2;
9564 capture_top = 1;
9565 
9566 /* Convert pointers to sizes. */
9567 while (--oveccount != 0)
9568   {
9569   capture_top++;
9570 
9571   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9572   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9573 
9574   if (ovector[0] != PCRE2_UNSET)
9575     callout_block->capture_top = capture_top;
9576 
9577   ovector += 2;
9578   jit_ovector += 2;
9579   }
9580 
9581 return (arguments->callout)(callout_block, arguments->callout_data);
9582 }
9583 
9584 #define CALLOUT_ARG_OFFSET(arg) \
9585     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9586 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9587 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9588 {
9589 DEFINE_COMPILER;
9590 backtrack_common *backtrack;
9591 sljit_s32 mov_opcode;
9592 unsigned int callout_length = (*cc == OP_CALLOUT)
9593     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9594 sljit_sw value1;
9595 sljit_sw value2;
9596 sljit_sw value3;
9597 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9598 
9599 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9600 
9601 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9602 
9603 allocate_stack(common, callout_arg_size);
9604 
9605 SLJIT_ASSERT(common->capture_last_ptr != 0);
9606 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9607 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9608 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9609 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9610 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9611 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9612 
9613 /* These pointer sized fields temporarly stores internal variables. */
9614 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9615 
9616 if (common->mark_ptr != 0)
9617   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9618 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9619 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9620 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9621 
9622 if (*cc == OP_CALLOUT)
9623   {
9624   value1 = 0;
9625   value2 = 0;
9626   value3 = 0;
9627   }
9628 else
9629   {
9630   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9631   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9632   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9633   }
9634 
9635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9636 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9637 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9639 
9640 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9641 
9642 /* Needed to save important temporary registers. */
9643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9644 /* SLJIT_R0 = arguments */
9645 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9646 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9647 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9648 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9649 free_stack(common, callout_arg_size);
9650 
9651 /* Check return value. */
9652 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9653 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9654 if (common->abort_label == NULL)
9655   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9656 else
9657   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9658 return cc + callout_length;
9659 }
9660 
9661 #undef CALLOUT_ARG_SIZE
9662 #undef CALLOUT_ARG_OFFSET
9663 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9664 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9665 {
9666 while (TRUE)
9667   {
9668   switch (*cc)
9669     {
9670     case OP_CALLOUT_STR:
9671     cc += GET(cc, 1 + 2*LINK_SIZE);
9672     break;
9673 
9674     case OP_NOT_WORD_BOUNDARY:
9675     case OP_WORD_BOUNDARY:
9676     case OP_CIRC:
9677     case OP_CIRCM:
9678     case OP_DOLL:
9679     case OP_DOLLM:
9680     case OP_CALLOUT:
9681     case OP_ALT:
9682     cc += PRIV(OP_lengths)[*cc];
9683     break;
9684 
9685     case OP_KET:
9686     return FALSE;
9687 
9688     default:
9689     return TRUE;
9690     }
9691   }
9692 }
9693 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9694 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9695 {
9696 DEFINE_COMPILER;
9697 int framesize;
9698 int extrasize;
9699 BOOL local_quit_available = FALSE;
9700 BOOL needs_control_head;
9701 int private_data_ptr;
9702 backtrack_common altbacktrack;
9703 PCRE2_SPTR ccbegin;
9704 PCRE2_UCHAR opcode;
9705 PCRE2_UCHAR bra = OP_BRA;
9706 jump_list *tmp = NULL;
9707 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9708 jump_list **found;
9709 /* Saving previous accept variables. */
9710 BOOL save_local_quit_available = common->local_quit_available;
9711 BOOL save_in_positive_assertion = common->in_positive_assertion;
9712 then_trap_backtrack *save_then_trap = common->then_trap;
9713 struct sljit_label *save_quit_label = common->quit_label;
9714 struct sljit_label *save_accept_label = common->accept_label;
9715 jump_list *save_quit = common->quit;
9716 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9717 jump_list *save_accept = common->accept;
9718 struct sljit_jump *jump;
9719 struct sljit_jump *brajump = NULL;
9720 
9721 /* Assert captures then. */
9722 common->then_trap = NULL;
9723 
9724 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9725   {
9726   SLJIT_ASSERT(!conditional);
9727   bra = *cc;
9728   cc++;
9729   }
9730 private_data_ptr = PRIVATE_DATA(cc);
9731 SLJIT_ASSERT(private_data_ptr != 0);
9732 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9733 backtrack->framesize = framesize;
9734 backtrack->private_data_ptr = private_data_ptr;
9735 opcode = *cc;
9736 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9737 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9738 ccbegin = cc;
9739 cc += GET(cc, 1);
9740 
9741 if (bra == OP_BRAMINZERO)
9742   {
9743   /* This is a braminzero backtrack path. */
9744   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9745   free_stack(common, 1);
9746   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9747   }
9748 
9749 if (framesize < 0)
9750   {
9751   extrasize = 1;
9752   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9753     extrasize = 0;
9754 
9755   if (needs_control_head)
9756     extrasize++;
9757 
9758   if (framesize == no_frame)
9759     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9760 
9761   if (extrasize > 0)
9762     allocate_stack(common, extrasize);
9763 
9764   if (needs_control_head)
9765     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9766 
9767   if (extrasize > 0)
9768     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9769 
9770   if (needs_control_head)
9771     {
9772     SLJIT_ASSERT(extrasize == 2);
9773     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9774     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9775     }
9776   }
9777 else
9778   {
9779   extrasize = needs_control_head ? 3 : 2;
9780   allocate_stack(common, framesize + extrasize);
9781 
9782   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9783   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9784   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9785   if (needs_control_head)
9786     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9787   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9788 
9789   if (needs_control_head)
9790     {
9791     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9792     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9793     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9794     }
9795   else
9796     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9797 
9798   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9799   }
9800 
9801 memset(&altbacktrack, 0, sizeof(backtrack_common));
9802 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9803   {
9804   /* Control verbs cannot escape from these asserts. */
9805   local_quit_available = TRUE;
9806   common->local_quit_available = TRUE;
9807   common->quit_label = NULL;
9808   common->quit = NULL;
9809   }
9810 
9811 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9812 common->positive_assertion_quit = NULL;
9813 
9814 while (1)
9815   {
9816   common->accept_label = NULL;
9817   common->accept = NULL;
9818   altbacktrack.top = NULL;
9819   altbacktrack.topbacktracks = NULL;
9820 
9821   if (*ccbegin == OP_ALT && extrasize > 0)
9822     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9823 
9824   altbacktrack.cc = ccbegin;
9825   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9826   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9827     {
9828     if (local_quit_available)
9829       {
9830       common->local_quit_available = save_local_quit_available;
9831       common->quit_label = save_quit_label;
9832       common->quit = save_quit;
9833       }
9834     common->in_positive_assertion = save_in_positive_assertion;
9835     common->then_trap = save_then_trap;
9836     common->accept_label = save_accept_label;
9837     common->positive_assertion_quit = save_positive_assertion_quit;
9838     common->accept = save_accept;
9839     return NULL;
9840     }
9841   common->accept_label = LABEL();
9842   if (common->accept != NULL)
9843     set_jumps(common->accept, common->accept_label);
9844 
9845   /* Reset stack. */
9846   if (framesize < 0)
9847     {
9848     if (framesize == no_frame)
9849       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9850     else if (extrasize > 0)
9851       free_stack(common, extrasize);
9852 
9853     if (needs_control_head)
9854       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9855     }
9856   else
9857     {
9858     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9859       {
9860       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9861       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9862       if (needs_control_head)
9863         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9864       }
9865     else
9866       {
9867       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9868       if (needs_control_head)
9869         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9870       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9871       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9872       }
9873     }
9874 
9875   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9876     {
9877     /* We know that STR_PTR was stored on the top of the stack. */
9878     if (conditional)
9879       {
9880       if (extrasize > 0)
9881         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9882       }
9883     else if (bra == OP_BRAZERO)
9884       {
9885       if (framesize < 0)
9886         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9887       else
9888         {
9889         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9890         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9891         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9892         }
9893       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9894       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9895       }
9896     else if (framesize >= 0)
9897       {
9898       /* For OP_BRA and OP_BRAMINZERO. */
9899       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9900       }
9901     }
9902   add_jump(compiler, found, JUMP(SLJIT_JUMP));
9903 
9904   compile_backtrackingpath(common, altbacktrack.top);
9905   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9906     {
9907     if (local_quit_available)
9908       {
9909       common->local_quit_available = save_local_quit_available;
9910       common->quit_label = save_quit_label;
9911       common->quit = save_quit;
9912       }
9913     common->in_positive_assertion = save_in_positive_assertion;
9914     common->then_trap = save_then_trap;
9915     common->accept_label = save_accept_label;
9916     common->positive_assertion_quit = save_positive_assertion_quit;
9917     common->accept = save_accept;
9918     return NULL;
9919     }
9920   set_jumps(altbacktrack.topbacktracks, LABEL());
9921 
9922   if (*cc != OP_ALT)
9923     break;
9924 
9925   ccbegin = cc;
9926   cc += GET(cc, 1);
9927   }
9928 
9929 if (local_quit_available)
9930   {
9931   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9932   /* Makes the check less complicated below. */
9933   common->positive_assertion_quit = common->quit;
9934   }
9935 
9936 /* None of them matched. */
9937 if (common->positive_assertion_quit != NULL)
9938   {
9939   jump = JUMP(SLJIT_JUMP);
9940   set_jumps(common->positive_assertion_quit, LABEL());
9941   SLJIT_ASSERT(framesize != no_stack);
9942   if (framesize < 0)
9943     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9944   else
9945     {
9946     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9947     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9948     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9949     }
9950   JUMPHERE(jump);
9951   }
9952 
9953 if (needs_control_head)
9954   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9955 
9956 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9957   {
9958   /* Assert is failed. */
9959   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9960     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9961 
9962   if (framesize < 0)
9963     {
9964     /* The topmost item should be 0. */
9965     if (bra == OP_BRAZERO)
9966       {
9967       if (extrasize == 2)
9968         free_stack(common, 1);
9969       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9970       }
9971     else if (extrasize > 0)
9972       free_stack(common, extrasize);
9973     }
9974   else
9975     {
9976     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9977     /* The topmost item should be 0. */
9978     if (bra == OP_BRAZERO)
9979       {
9980       free_stack(common, framesize + extrasize - 1);
9981       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9982       }
9983     else
9984       free_stack(common, framesize + extrasize);
9985     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986     }
9987   jump = JUMP(SLJIT_JUMP);
9988   if (bra != OP_BRAZERO)
9989     add_jump(compiler, target, jump);
9990 
9991   /* Assert is successful. */
9992   set_jumps(tmp, LABEL());
9993   if (framesize < 0)
9994     {
9995     /* We know that STR_PTR was stored on the top of the stack. */
9996     if (extrasize > 0)
9997       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9998 
9999     /* Keep the STR_PTR on the top of the stack. */
10000     if (bra == OP_BRAZERO)
10001       {
10002       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10003       if (extrasize == 2)
10004         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10005       }
10006     else if (bra == OP_BRAMINZERO)
10007       {
10008       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10009       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10010       }
10011     }
10012   else
10013     {
10014     if (bra == OP_BRA)
10015       {
10016       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10017       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10018       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10019       }
10020     else
10021       {
10022       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10024       if (extrasize == 2)
10025         {
10026         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10027         if (bra == OP_BRAMINZERO)
10028           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10029         }
10030       else
10031         {
10032         SLJIT_ASSERT(extrasize == 3);
10033         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10034         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10035         }
10036       }
10037     }
10038 
10039   if (bra == OP_BRAZERO)
10040     {
10041     backtrack->matchingpath = LABEL();
10042     SET_LABEL(jump, backtrack->matchingpath);
10043     }
10044   else if (bra == OP_BRAMINZERO)
10045     {
10046     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10047     JUMPHERE(brajump);
10048     if (framesize >= 0)
10049       {
10050       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10051       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10052       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10053       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10054       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10055       }
10056     set_jumps(backtrack->common.topbacktracks, LABEL());
10057     }
10058   }
10059 else
10060   {
10061   /* AssertNot is successful. */
10062   if (framesize < 0)
10063     {
10064     if (extrasize > 0)
10065       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066 
10067     if (bra != OP_BRA)
10068       {
10069       if (extrasize == 2)
10070         free_stack(common, 1);
10071       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10072       }
10073     else if (extrasize > 0)
10074       free_stack(common, extrasize);
10075     }
10076   else
10077     {
10078     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10079     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10080     /* The topmost item should be 0. */
10081     if (bra != OP_BRA)
10082       {
10083       free_stack(common, framesize + extrasize - 1);
10084       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10085       }
10086     else
10087       free_stack(common, framesize + extrasize);
10088     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10089     }
10090 
10091   if (bra == OP_BRAZERO)
10092     backtrack->matchingpath = LABEL();
10093   else if (bra == OP_BRAMINZERO)
10094     {
10095     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10096     JUMPHERE(brajump);
10097     }
10098 
10099   if (bra != OP_BRA)
10100     {
10101     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10102     set_jumps(backtrack->common.topbacktracks, LABEL());
10103     backtrack->common.topbacktracks = NULL;
10104     }
10105   }
10106 
10107 if (local_quit_available)
10108   {
10109   common->local_quit_available = save_local_quit_available;
10110   common->quit_label = save_quit_label;
10111   common->quit = save_quit;
10112   }
10113 common->in_positive_assertion = save_in_positive_assertion;
10114 common->then_trap = save_then_trap;
10115 common->accept_label = save_accept_label;
10116 common->positive_assertion_quit = save_positive_assertion_quit;
10117 common->accept = save_accept;
10118 return cc + 1 + LINK_SIZE;
10119 }
10120 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10121 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10122 {
10123 DEFINE_COMPILER;
10124 int stacksize;
10125 
10126 if (framesize < 0)
10127   {
10128   if (framesize == no_frame)
10129     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10130   else
10131     {
10132     stacksize = needs_control_head ? 1 : 0;
10133     if (ket != OP_KET || has_alternatives)
10134       stacksize++;
10135 
10136     if (stacksize > 0)
10137       free_stack(common, stacksize);
10138     }
10139 
10140   if (needs_control_head)
10141     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10142 
10143   /* TMP2 which is set here used by OP_KETRMAX below. */
10144   if (ket == OP_KETRMAX)
10145     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10146   else if (ket == OP_KETRMIN)
10147     {
10148     /* Move the STR_PTR to the private_data_ptr. */
10149     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10150     }
10151   }
10152 else
10153   {
10154   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10155   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10156   if (needs_control_head)
10157     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10158 
10159   if (ket == OP_KETRMAX)
10160     {
10161     /* TMP2 which is set here used by OP_KETRMAX below. */
10162     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10163     }
10164   }
10165 if (needs_control_head)
10166   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10167 }
10168 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10169 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10170 {
10171 DEFINE_COMPILER;
10172 
10173 if (common->capture_last_ptr != 0)
10174   {
10175   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10176   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10177   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10178   stacksize++;
10179   }
10180 if (common->optimized_cbracket[offset >> 1] == 0)
10181   {
10182   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10183   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10184   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10185   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10186   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10187   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10188   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10189   stacksize += 2;
10190   }
10191 return stacksize;
10192 }
10193 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10194 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10195 {
10196   if (PRIV(script_run)(ptr, endptr, FALSE))
10197     return endptr;
10198   return NULL;
10199 }
10200 
10201 #ifdef SUPPORT_UNICODE
10202 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10203 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10204 {
10205   if (PRIV(script_run)(ptr, endptr, TRUE))
10206     return endptr;
10207   return NULL;
10208 }
10209 
10210 #endif /* SUPPORT_UNICODE */
10211 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10212 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10213 {
10214 DEFINE_COMPILER;
10215 
10216 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10217 
10218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10219 #ifdef SUPPORT_UNICODE
10220 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10221   common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10222 #else
10223 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10224 #endif
10225 
10226 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10227 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10228 }
10229 
10230 /*
10231   Handling bracketed expressions is probably the most complex part.
10232 
10233   Stack layout naming characters:
10234     S - Push the current STR_PTR
10235     0 - Push a 0 (NULL)
10236     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10237         before the next alternative. Not pushed if there are no alternatives.
10238     M - Any values pushed by the current alternative. Can be empty, or anything.
10239     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10240     L - Push the previous local (pointed by localptr) to the stack
10241    () - opional values stored on the stack
10242   ()* - optonal, can be stored multiple times
10243 
10244   The following list shows the regular expression templates, their PCRE byte codes
10245   and stack layout supported by pcre-sljit.
10246 
10247   (?:)                     OP_BRA     | OP_KET                A M
10248   ()                       OP_CBRA    | OP_KET                C M
10249   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10250                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10251   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10252                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10253   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10254                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10255   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10256                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10257   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10258   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10259   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10260   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10261   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10262            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10263   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10264            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10265   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10266            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10267   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10268            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10269 
10270 
10271   Stack layout naming characters:
10272     A - Push the alternative index (starting from 0) on the stack.
10273         Not pushed if there is no alternatives.
10274     M - Any values pushed by the current alternative. Can be empty, or anything.
10275 
10276   The next list shows the possible content of a bracket:
10277   (|)     OP_*BRA    | OP_ALT ...         M A
10278   (?()|)  OP_*COND   | OP_ALT             M A
10279   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10280                                           Or nothing, if trace is unnecessary
10281 */
10282 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10283 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10284 {
10285 DEFINE_COMPILER;
10286 backtrack_common *backtrack;
10287 PCRE2_UCHAR opcode;
10288 int private_data_ptr = 0;
10289 int offset = 0;
10290 int i, stacksize;
10291 int repeat_ptr = 0, repeat_length = 0;
10292 int repeat_type = 0, repeat_count = 0;
10293 PCRE2_SPTR ccbegin;
10294 PCRE2_SPTR matchingpath;
10295 PCRE2_SPTR slot;
10296 PCRE2_UCHAR bra = OP_BRA;
10297 PCRE2_UCHAR ket;
10298 assert_backtrack *assert;
10299 BOOL has_alternatives;
10300 BOOL needs_control_head = FALSE;
10301 struct sljit_jump *jump;
10302 struct sljit_jump *skip;
10303 struct sljit_label *rmax_label = NULL;
10304 struct sljit_jump *braminzero = NULL;
10305 
10306 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10307 
10308 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10309   {
10310   bra = *cc;
10311   cc++;
10312   opcode = *cc;
10313   }
10314 
10315 opcode = *cc;
10316 ccbegin = cc;
10317 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10318 ket = *matchingpath;
10319 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10320   {
10321   repeat_ptr = PRIVATE_DATA(matchingpath);
10322   repeat_length = PRIVATE_DATA(matchingpath + 1);
10323   repeat_type = PRIVATE_DATA(matchingpath + 2);
10324   repeat_count = PRIVATE_DATA(matchingpath + 3);
10325   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10326   if (repeat_type == OP_UPTO)
10327     ket = OP_KETRMAX;
10328   if (repeat_type == OP_MINUPTO)
10329     ket = OP_KETRMIN;
10330   }
10331 
10332 matchingpath = ccbegin + 1 + LINK_SIZE;
10333 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10334 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10335 cc += GET(cc, 1);
10336 
10337 has_alternatives = *cc == OP_ALT;
10338 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10339   {
10340   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10341     compile_time_checks_must_be_grouped_together);
10342   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10343   }
10344 
10345 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10346   opcode = OP_SCOND;
10347 
10348 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10349   {
10350   /* Capturing brackets has a pre-allocated space. */
10351   offset = GET2(ccbegin, 1 + LINK_SIZE);
10352   if (common->optimized_cbracket[offset] == 0)
10353     {
10354     private_data_ptr = OVECTOR_PRIV(offset);
10355     offset <<= 1;
10356     }
10357   else
10358     {
10359     offset <<= 1;
10360     private_data_ptr = OVECTOR(offset);
10361     }
10362   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10363   matchingpath += IMM2_SIZE;
10364   }
10365 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10366   {
10367   /* Other brackets simply allocate the next entry. */
10368   private_data_ptr = PRIVATE_DATA(ccbegin);
10369   SLJIT_ASSERT(private_data_ptr != 0);
10370   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10371   if (opcode == OP_ONCE)
10372     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10373   }
10374 
10375 /* Instructions before the first alternative. */
10376 stacksize = 0;
10377 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10378   stacksize++;
10379 if (bra == OP_BRAZERO)
10380   stacksize++;
10381 
10382 if (stacksize > 0)
10383   allocate_stack(common, stacksize);
10384 
10385 stacksize = 0;
10386 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10387   {
10388   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10389   stacksize++;
10390   }
10391 
10392 if (bra == OP_BRAZERO)
10393   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10394 
10395 if (bra == OP_BRAMINZERO)
10396   {
10397   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10398   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10399   if (ket != OP_KETRMIN)
10400     {
10401     free_stack(common, 1);
10402     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10403     }
10404   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10405     {
10406     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10407     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10408     /* Nothing stored during the first run. */
10409     skip = JUMP(SLJIT_JUMP);
10410     JUMPHERE(jump);
10411     /* Checking zero-length iteration. */
10412     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10413       {
10414       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10415       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10416       }
10417     else
10418       {
10419       /* Except when the whole stack frame must be saved. */
10420       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10421       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10422       }
10423     JUMPHERE(skip);
10424     }
10425   else
10426     {
10427     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10428     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10429     JUMPHERE(jump);
10430     }
10431   }
10432 
10433 if (repeat_type != 0)
10434   {
10435   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10436   if (repeat_type == OP_EXACT)
10437     rmax_label = LABEL();
10438   }
10439 
10440 if (ket == OP_KETRMIN)
10441   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10442 
10443 if (ket == OP_KETRMAX)
10444   {
10445   rmax_label = LABEL();
10446   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10447     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10448   }
10449 
10450 /* Handling capturing brackets and alternatives. */
10451 if (opcode == OP_ONCE)
10452   {
10453   stacksize = 0;
10454   if (needs_control_head)
10455     {
10456     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10457     stacksize++;
10458     }
10459 
10460   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10461     {
10462     /* Neither capturing brackets nor recursions are found in the block. */
10463     if (ket == OP_KETRMIN)
10464       {
10465       stacksize += 2;
10466       if (!needs_control_head)
10467         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468       }
10469     else
10470       {
10471       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10472         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10473       if (ket == OP_KETRMAX || has_alternatives)
10474         stacksize++;
10475       }
10476 
10477     if (stacksize > 0)
10478       allocate_stack(common, stacksize);
10479 
10480     stacksize = 0;
10481     if (needs_control_head)
10482       {
10483       stacksize++;
10484       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10485       }
10486 
10487     if (ket == OP_KETRMIN)
10488       {
10489       if (needs_control_head)
10490         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10491       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10492       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10493         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10494       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10495       }
10496     else if (ket == OP_KETRMAX || has_alternatives)
10497       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10498     }
10499   else
10500     {
10501     if (ket != OP_KET || has_alternatives)
10502       stacksize++;
10503 
10504     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10505     allocate_stack(common, stacksize);
10506 
10507     if (needs_control_head)
10508       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10509 
10510     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10511     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10512 
10513     stacksize = needs_control_head ? 1 : 0;
10514     if (ket != OP_KET || has_alternatives)
10515       {
10516       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10517       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10518       stacksize++;
10519       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10520       }
10521     else
10522       {
10523       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10524       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10525       }
10526     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10527     }
10528   }
10529 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10530   {
10531   /* Saving the previous values. */
10532   if (common->optimized_cbracket[offset >> 1] != 0)
10533     {
10534     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10535     allocate_stack(common, 2);
10536     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10537     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10538     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10539     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10540     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10541     }
10542   else
10543     {
10544     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10545     allocate_stack(common, 1);
10546     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10547     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10548     }
10549   }
10550 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10551   {
10552   /* Saving the previous value. */
10553   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10554   allocate_stack(common, 1);
10555   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10556   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10557   }
10558 else if (has_alternatives)
10559   {
10560   /* Pushing the starting string pointer. */
10561   allocate_stack(common, 1);
10562   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10563   }
10564 
10565 /* Generating code for the first alternative. */
10566 if (opcode == OP_COND || opcode == OP_SCOND)
10567   {
10568   if (*matchingpath == OP_CREF)
10569     {
10570     SLJIT_ASSERT(has_alternatives);
10571     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10572       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10573     matchingpath += 1 + IMM2_SIZE;
10574     }
10575   else if (*matchingpath == OP_DNCREF)
10576     {
10577     SLJIT_ASSERT(has_alternatives);
10578 
10579     i = GET2(matchingpath, 1 + IMM2_SIZE);
10580     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10581     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10582     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10583     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10584     slot += common->name_entry_size;
10585     i--;
10586     while (i-- > 0)
10587       {
10588       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10589       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10590       slot += common->name_entry_size;
10591       }
10592     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10593     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10594     matchingpath += 1 + 2 * IMM2_SIZE;
10595     }
10596   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10597     {
10598     /* Never has other case. */
10599     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10600     SLJIT_ASSERT(!has_alternatives);
10601 
10602     if (*matchingpath == OP_TRUE)
10603       {
10604       stacksize = 1;
10605       matchingpath++;
10606       }
10607     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10608       stacksize = 0;
10609     else if (*matchingpath == OP_RREF)
10610       {
10611       stacksize = GET2(matchingpath, 1);
10612       if (common->currententry == NULL)
10613         stacksize = 0;
10614       else if (stacksize == RREF_ANY)
10615         stacksize = 1;
10616       else if (common->currententry->start == 0)
10617         stacksize = stacksize == 0;
10618       else
10619         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10620 
10621       if (stacksize != 0)
10622         matchingpath += 1 + IMM2_SIZE;
10623       }
10624     else
10625       {
10626       if (common->currententry == NULL || common->currententry->start == 0)
10627         stacksize = 0;
10628       else
10629         {
10630         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10631         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10632         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10633         while (stacksize > 0)
10634           {
10635           if ((int)GET2(slot, 0) == i)
10636             break;
10637           slot += common->name_entry_size;
10638           stacksize--;
10639           }
10640         }
10641 
10642       if (stacksize != 0)
10643         matchingpath += 1 + 2 * IMM2_SIZE;
10644       }
10645 
10646       /* The stacksize == 0 is a common "else" case. */
10647       if (stacksize == 0)
10648         {
10649         if (*cc == OP_ALT)
10650           {
10651           matchingpath = cc + 1 + LINK_SIZE;
10652           cc += GET(cc, 1);
10653           }
10654         else
10655           matchingpath = cc;
10656         }
10657     }
10658   else
10659     {
10660     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10661     /* Similar code as PUSH_BACKTRACK macro. */
10662     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10663     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10664       return NULL;
10665     memset(assert, 0, sizeof(assert_backtrack));
10666     assert->common.cc = matchingpath;
10667     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10668     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10669     }
10670   }
10671 
10672 compile_matchingpath(common, matchingpath, cc, backtrack);
10673 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10674   return NULL;
10675 
10676 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10677   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10678 
10679 if (opcode == OP_ONCE)
10680   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10681 
10682 if (opcode == OP_SCRIPT_RUN)
10683   match_script_run_common(common, private_data_ptr, backtrack);
10684 
10685 stacksize = 0;
10686 if (repeat_type == OP_MINUPTO)
10687   {
10688   /* We need to preserve the counter. TMP2 will be used below. */
10689   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10690   stacksize++;
10691   }
10692 if (ket != OP_KET || bra != OP_BRA)
10693   stacksize++;
10694 if (offset != 0)
10695   {
10696   if (common->capture_last_ptr != 0)
10697     stacksize++;
10698   if (common->optimized_cbracket[offset >> 1] == 0)
10699     stacksize += 2;
10700   }
10701 if (has_alternatives && opcode != OP_ONCE)
10702   stacksize++;
10703 
10704 if (stacksize > 0)
10705   allocate_stack(common, stacksize);
10706 
10707 stacksize = 0;
10708 if (repeat_type == OP_MINUPTO)
10709   {
10710   /* TMP2 was set above. */
10711   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10712   stacksize++;
10713   }
10714 
10715 if (ket != OP_KET || bra != OP_BRA)
10716   {
10717   if (ket != OP_KET)
10718     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10719   else
10720     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10721   stacksize++;
10722   }
10723 
10724 if (offset != 0)
10725   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10726 
10727 /* Skip and count the other alternatives. */
10728 i = 1;
10729 while (*cc == OP_ALT)
10730   {
10731   cc += GET(cc, 1);
10732   i++;
10733   }
10734 
10735 if (has_alternatives)
10736   {
10737   if (opcode != OP_ONCE)
10738     {
10739     if (i <= 3)
10740       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10741     else
10742       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10743     }
10744   if (ket != OP_KETRMAX)
10745     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10746   }
10747 
10748 /* Must be after the matchingpath label. */
10749 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10750   {
10751   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10752   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10753   }
10754 
10755 if (ket == OP_KETRMAX)
10756   {
10757   if (repeat_type != 0)
10758     {
10759     if (has_alternatives)
10760       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10761     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10762     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10763     /* Drop STR_PTR for greedy plus quantifier. */
10764     if (opcode != OP_ONCE)
10765       free_stack(common, 1);
10766     }
10767   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10768     {
10769     if (has_alternatives)
10770       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10771 
10772     /* Checking zero-length iteration. */
10773     if (opcode != OP_ONCE)
10774       {
10775       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10776       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10777       /* Drop STR_PTR for greedy plus quantifier. */
10778       if (bra != OP_BRAZERO)
10779         free_stack(common, 1);
10780       }
10781     else
10782       /* TMP2 must contain the starting STR_PTR. */
10783       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10784     }
10785   else
10786     JUMPTO(SLJIT_JUMP, rmax_label);
10787   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10788   }
10789 
10790 if (repeat_type == OP_EXACT)
10791   {
10792   count_match(common);
10793   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10794   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10795   }
10796 else if (repeat_type == OP_UPTO)
10797   {
10798   /* We need to preserve the counter. */
10799   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10800   allocate_stack(common, 1);
10801   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10802   }
10803 
10804 if (bra == OP_BRAZERO)
10805   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10806 
10807 if (bra == OP_BRAMINZERO)
10808   {
10809   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10810   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10811   if (braminzero != NULL)
10812     {
10813     JUMPHERE(braminzero);
10814     /* We need to release the end pointer to perform the
10815     backtrack for the zero-length iteration. When
10816     framesize is < 0, OP_ONCE will do the release itself. */
10817     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10818       {
10819       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10820       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10821       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10822       }
10823     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10824       free_stack(common, 1);
10825     }
10826   /* Continue to the normal backtrack. */
10827   }
10828 
10829 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10830   count_match(common);
10831 
10832 cc += 1 + LINK_SIZE;
10833 
10834 if (opcode == OP_ONCE)
10835   {
10836   /* We temporarily encode the needs_control_head in the lowest bit.
10837      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10838      the same value for small signed numbers (including negative numbers). */
10839   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10840   }
10841 return cc + repeat_length;
10842 }
10843 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10844 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10845 {
10846 DEFINE_COMPILER;
10847 backtrack_common *backtrack;
10848 PCRE2_UCHAR opcode;
10849 int private_data_ptr;
10850 int cbraprivptr = 0;
10851 BOOL needs_control_head;
10852 int framesize;
10853 int stacksize;
10854 int offset = 0;
10855 BOOL zero = FALSE;
10856 PCRE2_SPTR ccbegin = NULL;
10857 int stack; /* Also contains the offset of control head. */
10858 struct sljit_label *loop = NULL;
10859 struct jump_list *emptymatch = NULL;
10860 
10861 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10862 if (*cc == OP_BRAPOSZERO)
10863   {
10864   zero = TRUE;
10865   cc++;
10866   }
10867 
10868 opcode = *cc;
10869 private_data_ptr = PRIVATE_DATA(cc);
10870 SLJIT_ASSERT(private_data_ptr != 0);
10871 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10872 switch(opcode)
10873   {
10874   case OP_BRAPOS:
10875   case OP_SBRAPOS:
10876   ccbegin = cc + 1 + LINK_SIZE;
10877   break;
10878 
10879   case OP_CBRAPOS:
10880   case OP_SCBRAPOS:
10881   offset = GET2(cc, 1 + LINK_SIZE);
10882   /* This case cannot be optimized in the same was as
10883   normal capturing brackets. */
10884   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10885   cbraprivptr = OVECTOR_PRIV(offset);
10886   offset <<= 1;
10887   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10888   break;
10889 
10890   default:
10891   SLJIT_UNREACHABLE();
10892   break;
10893   }
10894 
10895 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10896 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10897 if (framesize < 0)
10898   {
10899   if (offset != 0)
10900     {
10901     stacksize = 2;
10902     if (common->capture_last_ptr != 0)
10903       stacksize++;
10904     }
10905   else
10906     stacksize = 1;
10907 
10908   if (needs_control_head)
10909     stacksize++;
10910   if (!zero)
10911     stacksize++;
10912 
10913   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10914   allocate_stack(common, stacksize);
10915   if (framesize == no_frame)
10916     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10917 
10918   stack = 0;
10919   if (offset != 0)
10920     {
10921     stack = 2;
10922     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10923     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10924     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10925     if (common->capture_last_ptr != 0)
10926       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10927     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10928     if (needs_control_head)
10929       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10930     if (common->capture_last_ptr != 0)
10931       {
10932       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10933       stack = 3;
10934       }
10935     }
10936   else
10937     {
10938     if (needs_control_head)
10939       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10940     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10941     stack = 1;
10942     }
10943 
10944   if (needs_control_head)
10945     stack++;
10946   if (!zero)
10947     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10948   if (needs_control_head)
10949     {
10950     stack--;
10951     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10952     }
10953   }
10954 else
10955   {
10956   stacksize = framesize + 1;
10957   if (!zero)
10958     stacksize++;
10959   if (needs_control_head)
10960     stacksize++;
10961   if (offset == 0)
10962     stacksize++;
10963   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10964 
10965   allocate_stack(common, stacksize);
10966   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10967   if (needs_control_head)
10968     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10969   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10970 
10971   stack = 0;
10972   if (!zero)
10973     {
10974     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10975     stack = 1;
10976     }
10977   if (needs_control_head)
10978     {
10979     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10980     stack++;
10981     }
10982   if (offset == 0)
10983     {
10984     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10985     stack++;
10986     }
10987   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10988   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10989   stack -= 1 + (offset == 0);
10990   }
10991 
10992 if (offset != 0)
10993   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10994 
10995 loop = LABEL();
10996 while (*cc != OP_KETRPOS)
10997   {
10998   backtrack->top = NULL;
10999   backtrack->topbacktracks = NULL;
11000   cc += GET(cc, 1);
11001 
11002   compile_matchingpath(common, ccbegin, cc, backtrack);
11003   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11004     return NULL;
11005 
11006   if (framesize < 0)
11007     {
11008     if (framesize == no_frame)
11009       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11010 
11011     if (offset != 0)
11012       {
11013       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11014       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11015       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11016       if (common->capture_last_ptr != 0)
11017         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11018       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11019       }
11020     else
11021       {
11022       if (opcode == OP_SBRAPOS)
11023         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11024       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11025       }
11026 
11027     /* Even if the match is empty, we need to reset the control head. */
11028     if (needs_control_head)
11029       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11030 
11031     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11032       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11033 
11034     if (!zero)
11035       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11036     }
11037   else
11038     {
11039     if (offset != 0)
11040       {
11041       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11042       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11043       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11044       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11045       if (common->capture_last_ptr != 0)
11046         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11047       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11048       }
11049     else
11050       {
11051       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11052       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11053       if (opcode == OP_SBRAPOS)
11054         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11055       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11056       }
11057 
11058     /* Even if the match is empty, we need to reset the control head. */
11059     if (needs_control_head)
11060       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11061 
11062     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11063       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11064 
11065     if (!zero)
11066       {
11067       if (framesize < 0)
11068         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11069       else
11070         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11071       }
11072     }
11073 
11074   JUMPTO(SLJIT_JUMP, loop);
11075   flush_stubs(common);
11076 
11077   compile_backtrackingpath(common, backtrack->top);
11078   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11079     return NULL;
11080   set_jumps(backtrack->topbacktracks, LABEL());
11081 
11082   if (framesize < 0)
11083     {
11084     if (offset != 0)
11085       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11086     else
11087       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11088     }
11089   else
11090     {
11091     if (offset != 0)
11092       {
11093       /* Last alternative. */
11094       if (*cc == OP_KETRPOS)
11095         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11096       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11097       }
11098     else
11099       {
11100       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11101       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11102       }
11103     }
11104 
11105   if (*cc == OP_KETRPOS)
11106     break;
11107   ccbegin = cc + 1 + LINK_SIZE;
11108   }
11109 
11110 /* We don't have to restore the control head in case of a failed match. */
11111 
11112 backtrack->topbacktracks = NULL;
11113 if (!zero)
11114   {
11115   if (framesize < 0)
11116     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11117   else /* TMP2 is set to [private_data_ptr] above. */
11118     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11119   }
11120 
11121 /* None of them matched. */
11122 set_jumps(emptymatch, LABEL());
11123 count_match(common);
11124 return cc + 1 + LINK_SIZE;
11125 }
11126 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11127 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11128 {
11129 int class_len;
11130 
11131 *opcode = *cc;
11132 *exact = 0;
11133 
11134 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11135   {
11136   cc++;
11137   *type = OP_CHAR;
11138   }
11139 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11140   {
11141   cc++;
11142   *type = OP_CHARI;
11143   *opcode -= OP_STARI - OP_STAR;
11144   }
11145 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11146   {
11147   cc++;
11148   *type = OP_NOT;
11149   *opcode -= OP_NOTSTAR - OP_STAR;
11150   }
11151 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11152   {
11153   cc++;
11154   *type = OP_NOTI;
11155   *opcode -= OP_NOTSTARI - OP_STAR;
11156   }
11157 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11158   {
11159   cc++;
11160   *opcode -= OP_TYPESTAR - OP_STAR;
11161   *type = OP_END;
11162   }
11163 else
11164   {
11165   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11166   *type = *opcode;
11167   cc++;
11168   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11169   *opcode = cc[class_len - 1];
11170 
11171   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11172     {
11173     *opcode -= OP_CRSTAR - OP_STAR;
11174     *end = cc + class_len;
11175 
11176     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11177       {
11178       *exact = 1;
11179       *opcode -= OP_PLUS - OP_STAR;
11180       }
11181     }
11182   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11183     {
11184     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11185     *end = cc + class_len;
11186 
11187     if (*opcode == OP_POSPLUS)
11188       {
11189       *exact = 1;
11190       *opcode = OP_POSSTAR;
11191       }
11192     }
11193   else
11194     {
11195     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11196     *max = GET2(cc, (class_len + IMM2_SIZE));
11197     *exact = GET2(cc, class_len);
11198 
11199     if (*max == 0)
11200       {
11201       if (*opcode == OP_CRPOSRANGE)
11202         *opcode = OP_POSSTAR;
11203       else
11204         *opcode -= OP_CRRANGE - OP_STAR;
11205       }
11206     else
11207       {
11208       *max -= *exact;
11209       if (*max == 0)
11210         *opcode = OP_EXACT;
11211       else if (*max == 1)
11212         {
11213         if (*opcode == OP_CRPOSRANGE)
11214           *opcode = OP_POSQUERY;
11215         else
11216           *opcode -= OP_CRRANGE - OP_QUERY;
11217         }
11218       else
11219         {
11220         if (*opcode == OP_CRPOSRANGE)
11221           *opcode = OP_POSUPTO;
11222         else
11223           *opcode -= OP_CRRANGE - OP_UPTO;
11224         }
11225       }
11226     *end = cc + class_len + 2 * IMM2_SIZE;
11227     }
11228   return cc;
11229   }
11230 
11231 switch(*opcode)
11232   {
11233   case OP_EXACT:
11234   *exact = GET2(cc, 0);
11235   cc += IMM2_SIZE;
11236   break;
11237 
11238   case OP_PLUS:
11239   case OP_MINPLUS:
11240   *exact = 1;
11241   *opcode -= OP_PLUS - OP_STAR;
11242   break;
11243 
11244   case OP_POSPLUS:
11245   *exact = 1;
11246   *opcode = OP_POSSTAR;
11247   break;
11248 
11249   case OP_UPTO:
11250   case OP_MINUPTO:
11251   case OP_POSUPTO:
11252   *max = GET2(cc, 0);
11253   cc += IMM2_SIZE;
11254   break;
11255   }
11256 
11257 if (*type == OP_END)
11258   {
11259   *type = *cc;
11260   *end = next_opcode(common, cc);
11261   cc++;
11262   return cc;
11263   }
11264 
11265 *end = cc + 1;
11266 #ifdef SUPPORT_UNICODE
11267 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11268 #endif
11269 return cc;
11270 }
11271 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11272 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11273 {
11274 DEFINE_COMPILER;
11275 backtrack_common *backtrack;
11276 PCRE2_UCHAR opcode;
11277 PCRE2_UCHAR type;
11278 sljit_u32 max = 0, exact;
11279 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11280 sljit_s32 early_fail_type;
11281 BOOL charpos_enabled;
11282 PCRE2_UCHAR charpos_char;
11283 unsigned int charpos_othercasebit;
11284 PCRE2_SPTR end;
11285 jump_list *no_match = NULL;
11286 jump_list *no_char1_match = NULL;
11287 struct sljit_jump *jump = NULL;
11288 struct sljit_label *label;
11289 int private_data_ptr = PRIVATE_DATA(cc);
11290 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11291 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11292 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11293 int tmp_base, tmp_offset;
11294 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11295 BOOL use_tmp;
11296 #endif
11297 
11298 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11299 
11300 early_fail_type = (early_fail_ptr & 0x7);
11301 early_fail_ptr >>= 3;
11302 
11303 /* During recursion, these optimizations are disabled. */
11304 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11305   {
11306   early_fail_ptr = 0;
11307   early_fail_type = type_skip;
11308   }
11309 
11310 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11311   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11312 
11313 if (early_fail_type == type_fail)
11314   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11315 
11316 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11317 
11318 if (type != OP_EXTUNI)
11319   {
11320   tmp_base = TMP3;
11321   tmp_offset = 0;
11322   }
11323 else
11324   {
11325   tmp_base = SLJIT_MEM1(SLJIT_SP);
11326   tmp_offset = POSSESSIVE0;
11327   }
11328 
11329 /* Handle fixed part first. */
11330 if (exact > 1)
11331   {
11332   SLJIT_ASSERT(early_fail_ptr == 0);
11333 
11334   if (common->mode == PCRE2_JIT_COMPLETE
11335 #ifdef SUPPORT_UNICODE
11336       && !common->utf
11337 #endif
11338       && type != OP_ANYNL && type != OP_EXTUNI)
11339     {
11340     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11341     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11342     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11343     label = LABEL();
11344     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11345     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11346     JUMPTO(SLJIT_NOT_ZERO, label);
11347     }
11348   else
11349     {
11350     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11351     label = LABEL();
11352     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11353     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11354     JUMPTO(SLJIT_NOT_ZERO, label);
11355     }
11356   }
11357 else if (exact == 1)
11358   {
11359   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11360 
11361   if (early_fail_type == type_fail_range)
11362     {
11363     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11364     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11365     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11366     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11367     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11368 
11369     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11370     }
11371   }
11372 
11373 switch(opcode)
11374   {
11375   case OP_STAR:
11376   case OP_UPTO:
11377   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11378 
11379   if (type == OP_ANYNL || type == OP_EXTUNI)
11380     {
11381     SLJIT_ASSERT(private_data_ptr == 0);
11382     SLJIT_ASSERT(early_fail_ptr == 0);
11383 
11384     allocate_stack(common, 2);
11385     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11386     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11387 
11388     if (opcode == OP_UPTO)
11389       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11390 
11391     label = LABEL();
11392     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11393     if (opcode == OP_UPTO)
11394       {
11395       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11396       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11397       jump = JUMP(SLJIT_ZERO);
11398       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11399       }
11400 
11401     /* We cannot use TMP3 because of allocate_stack. */
11402     allocate_stack(common, 1);
11403     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11404     JUMPTO(SLJIT_JUMP, label);
11405     if (jump != NULL)
11406       JUMPHERE(jump);
11407     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11408     break;
11409     }
11410 #ifdef SUPPORT_UNICODE
11411   else if (type == OP_ALLANY && !common->invalid_utf)
11412 #else
11413   else if (type == OP_ALLANY)
11414 #endif
11415     {
11416     if (opcode == OP_STAR)
11417       {
11418       if (private_data_ptr == 0)
11419         allocate_stack(common, 2);
11420 
11421       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11422       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11423 
11424       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11425       process_partial_match(common);
11426 
11427       if (early_fail_ptr != 0)
11428         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11429       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11430       break;
11431       }
11432 #ifdef SUPPORT_UNICODE
11433     else if (!common->utf)
11434 #else
11435     else
11436 #endif
11437       {
11438       if (private_data_ptr == 0)
11439         allocate_stack(common, 2);
11440 
11441       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11442       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11443 
11444       if (common->mode == PCRE2_JIT_COMPLETE)
11445         {
11446         OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11447         CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11448         }
11449       else
11450         {
11451         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11452         process_partial_match(common);
11453         JUMPHERE(jump);
11454         }
11455 
11456       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11457 
11458       if (early_fail_ptr != 0)
11459         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11460       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11461       break;
11462       }
11463     }
11464 
11465   charpos_enabled = FALSE;
11466   charpos_char = 0;
11467   charpos_othercasebit = 0;
11468 
11469   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11470     {
11471 #ifdef SUPPORT_UNICODE
11472     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11473 #else
11474     charpos_enabled = TRUE;
11475 #endif
11476     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11477       {
11478       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11479       if (charpos_othercasebit == 0)
11480         charpos_enabled = FALSE;
11481       }
11482 
11483     if (charpos_enabled)
11484       {
11485       charpos_char = end[1];
11486       /* Consume the OP_CHAR opcode. */
11487       end += 2;
11488 #if PCRE2_CODE_UNIT_WIDTH == 8
11489       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11490 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11491       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11492       if ((charpos_othercasebit & 0x100) != 0)
11493         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11494 #endif
11495       if (charpos_othercasebit != 0)
11496         charpos_char |= charpos_othercasebit;
11497 
11498       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11499       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11500       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11501       }
11502     }
11503 
11504   if (charpos_enabled)
11505     {
11506     if (opcode == OP_UPTO)
11507       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11508 
11509     /* Search the first instance of charpos_char. */
11510     jump = JUMP(SLJIT_JUMP);
11511     label = LABEL();
11512     if (opcode == OP_UPTO)
11513       {
11514       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11515       add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11516       }
11517     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11518     if (early_fail_ptr != 0)
11519       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11520     JUMPHERE(jump);
11521 
11522     detect_partial_match(common, &backtrack->topbacktracks);
11523     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11524     if (charpos_othercasebit != 0)
11525       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11526     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11527 
11528     if (private_data_ptr == 0)
11529       allocate_stack(common, 2);
11530     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11531     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11532 
11533     if (opcode == OP_UPTO)
11534       {
11535       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11536       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11537       }
11538 
11539     /* Search the last instance of charpos_char. */
11540     label = LABEL();
11541     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11542     if (early_fail_ptr != 0)
11543       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11544     detect_partial_match(common, &no_match);
11545     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11546     if (charpos_othercasebit != 0)
11547       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11548 
11549     if (opcode == OP_STAR)
11550       {
11551       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11552       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11553       JUMPTO(SLJIT_JUMP, label);
11554       }
11555     else
11556       {
11557       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11558       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11559       JUMPHERE(jump);
11560       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11561       JUMPTO(SLJIT_NOT_ZERO, label);
11562       }
11563 
11564     set_jumps(no_match, LABEL());
11565     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11566     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11567     }
11568   else
11569     {
11570     if (private_data_ptr == 0)
11571       allocate_stack(common, 2);
11572 
11573     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11574 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11575     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11576     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11577 
11578     if (common->utf)
11579       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11580 #endif
11581     if (opcode == OP_UPTO)
11582       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11583 
11584     detect_partial_match(common, &no_match);
11585     label = LABEL();
11586     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11587 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11588     if (common->utf)
11589       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11590 #endif
11591 
11592     if (opcode == OP_UPTO)
11593       {
11594       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11595       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11596       }
11597 
11598     detect_partial_match_to(common, label);
11599     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11600 
11601     set_jumps(no_char1_match, LABEL());
11602 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11603     if (common->utf)
11604       {
11605       set_jumps(no_match, LABEL());
11606       if (use_tmp)
11607         {
11608         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11609         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11610         }
11611       else
11612         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11613       }
11614     else
11615 #endif
11616       {
11617       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11618       set_jumps(no_match, LABEL());
11619       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11620       }
11621 
11622     if (early_fail_ptr != 0)
11623       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624     }
11625 
11626   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11627   break;
11628 
11629   case OP_MINSTAR:
11630   if (private_data_ptr == 0)
11631     allocate_stack(common, 1);
11632   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11633   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11634   if (early_fail_ptr != 0)
11635     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11636   break;
11637 
11638   case OP_MINUPTO:
11639   SLJIT_ASSERT(early_fail_ptr == 0);
11640   if (private_data_ptr == 0)
11641     allocate_stack(common, 2);
11642   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11643   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11644   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11645   break;
11646 
11647   case OP_QUERY:
11648   case OP_MINQUERY:
11649   SLJIT_ASSERT(early_fail_ptr == 0);
11650   if (private_data_ptr == 0)
11651     allocate_stack(common, 1);
11652   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11653   if (opcode == OP_QUERY)
11654     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11655   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11656   break;
11657 
11658   case OP_EXACT:
11659   break;
11660 
11661   case OP_POSSTAR:
11662 #if defined SUPPORT_UNICODE
11663   if (type == OP_ALLANY && !common->invalid_utf)
11664 #else
11665   if (type == OP_ALLANY)
11666 #endif
11667     {
11668     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11669     process_partial_match(common);
11670     if (early_fail_ptr != 0)
11671       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11672     break;
11673     }
11674 
11675 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11676   if (common->utf)
11677     {
11678     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11679     detect_partial_match(common, &no_match);
11680     label = LABEL();
11681     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11682     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11683     detect_partial_match_to(common, label);
11684 
11685     set_jumps(no_match, LABEL());
11686     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11687     if (early_fail_ptr != 0)
11688       {
11689       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11690         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11691       else
11692         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11693       }
11694     break;
11695     }
11696 #endif
11697 
11698   detect_partial_match(common, &no_match);
11699   label = LABEL();
11700   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11701   detect_partial_match_to(common, label);
11702   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11703 
11704   set_jumps(no_char1_match, LABEL());
11705   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11706   set_jumps(no_match, LABEL());
11707   if (early_fail_ptr != 0)
11708     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11709   break;
11710 
11711   case OP_POSUPTO:
11712   SLJIT_ASSERT(early_fail_ptr == 0);
11713 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11714   if (common->utf)
11715     {
11716     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11717     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11718 
11719     detect_partial_match(common, &no_match);
11720     label = LABEL();
11721     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11722     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11723     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11724     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11725     detect_partial_match_to(common, label);
11726 
11727     set_jumps(no_match, LABEL());
11728     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11729     break;
11730     }
11731 #endif
11732 
11733   if (type == OP_ALLANY)
11734     {
11735     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11736 
11737     if (common->mode == PCRE2_JIT_COMPLETE)
11738       {
11739       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11740       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11741       }
11742     else
11743       {
11744       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11745       process_partial_match(common);
11746       JUMPHERE(jump);
11747       }
11748     break;
11749     }
11750 
11751   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11752 
11753   detect_partial_match(common, &no_match);
11754   label = LABEL();
11755   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11756   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758   detect_partial_match_to(common, label);
11759   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11760 
11761   set_jumps(no_char1_match, LABEL());
11762   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11763   set_jumps(no_match, LABEL());
11764   break;
11765 
11766   case OP_POSQUERY:
11767   SLJIT_ASSERT(early_fail_ptr == 0);
11768   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11769   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11770   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11771   set_jumps(no_match, LABEL());
11772   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11773   break;
11774 
11775   default:
11776   SLJIT_UNREACHABLE();
11777   break;
11778   }
11779 
11780 count_match(common);
11781 return end;
11782 }
11783 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11784 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11785 {
11786 DEFINE_COMPILER;
11787 backtrack_common *backtrack;
11788 
11789 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11790 
11791 if (*cc == OP_FAIL)
11792   {
11793   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11794   return cc + 1;
11795   }
11796 
11797 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11798   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11799 
11800 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11801   {
11802   /* No need to check notempty conditions. */
11803   if (common->accept_label == NULL)
11804     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11805   else
11806     JUMPTO(SLJIT_JUMP, common->accept_label);
11807   return cc + 1;
11808   }
11809 
11810 if (common->accept_label == NULL)
11811   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11812 else
11813   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11814 
11815 if (HAS_VIRTUAL_REGISTERS)
11816   {
11817   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11818   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11819   }
11820 else
11821   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11822 
11823 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11824 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11825 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11826 if (common->accept_label == NULL)
11827   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11828 else
11829   JUMPTO(SLJIT_ZERO, common->accept_label);
11830 
11831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11832 if (common->accept_label == NULL)
11833   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11834 else
11835   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11836 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11837 return cc + 1;
11838 }
11839 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11840 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11841 {
11842 DEFINE_COMPILER;
11843 int offset = GET2(cc, 1);
11844 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11845 
11846 /* Data will be discarded anyway... */
11847 if (common->currententry != NULL)
11848   return cc + 1 + IMM2_SIZE;
11849 
11850 if (!optimized_cbracket)
11851   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11852 offset <<= 1;
11853 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11854 if (!optimized_cbracket)
11855   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11856 return cc + 1 + IMM2_SIZE;
11857 }
11858 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11859 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11860 {
11861 DEFINE_COMPILER;
11862 backtrack_common *backtrack;
11863 PCRE2_UCHAR opcode = *cc;
11864 PCRE2_SPTR ccend = cc + 1;
11865 
11866 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11867     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11868   ccend += 2 + cc[1];
11869 
11870 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11871 
11872 if (opcode == OP_SKIP)
11873   {
11874   allocate_stack(common, 1);
11875   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11876   return ccend;
11877   }
11878 
11879 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11880   {
11881   if (HAS_VIRTUAL_REGISTERS)
11882     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11883   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11884   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11885   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11886   }
11887 
11888 return ccend;
11889 }
11890 
11891 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11892 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11893 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11894 {
11895 DEFINE_COMPILER;
11896 backtrack_common *backtrack;
11897 BOOL needs_control_head;
11898 int size;
11899 
11900 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11901 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11902 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11903 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11904 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11905 
11906 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11907 size = 3 + (size < 0 ? 0 : size);
11908 
11909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11910 allocate_stack(common, size);
11911 if (size > 3)
11912   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11913 else
11914   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11918 
11919 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11920 if (size >= 0)
11921   init_frame(common, cc, ccend, size - 1, 0);
11922 }
11923 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11924 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11925 {
11926 DEFINE_COMPILER;
11927 backtrack_common *backtrack;
11928 BOOL has_then_trap = FALSE;
11929 then_trap_backtrack *save_then_trap = NULL;
11930 
11931 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11932 
11933 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11934   {
11935   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11936   has_then_trap = TRUE;
11937   save_then_trap = common->then_trap;
11938   /* Tail item on backtrack. */
11939   compile_then_trap_matchingpath(common, cc, ccend, parent);
11940   }
11941 
11942 while (cc < ccend)
11943   {
11944   switch(*cc)
11945     {
11946     case OP_SOD:
11947     case OP_SOM:
11948     case OP_NOT_WORD_BOUNDARY:
11949     case OP_WORD_BOUNDARY:
11950     case OP_EODN:
11951     case OP_EOD:
11952     case OP_DOLL:
11953     case OP_DOLLM:
11954     case OP_CIRC:
11955     case OP_CIRCM:
11956     case OP_REVERSE:
11957     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11958     break;
11959 
11960     case OP_NOT_DIGIT:
11961     case OP_DIGIT:
11962     case OP_NOT_WHITESPACE:
11963     case OP_WHITESPACE:
11964     case OP_NOT_WORDCHAR:
11965     case OP_WORDCHAR:
11966     case OP_ANY:
11967     case OP_ALLANY:
11968     case OP_ANYBYTE:
11969     case OP_NOTPROP:
11970     case OP_PROP:
11971     case OP_ANYNL:
11972     case OP_NOT_HSPACE:
11973     case OP_HSPACE:
11974     case OP_NOT_VSPACE:
11975     case OP_VSPACE:
11976     case OP_EXTUNI:
11977     case OP_NOT:
11978     case OP_NOTI:
11979     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11980     break;
11981 
11982     case OP_SET_SOM:
11983     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11984     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11985     allocate_stack(common, 1);
11986     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11987     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11988     cc++;
11989     break;
11990 
11991     case OP_CHAR:
11992     case OP_CHARI:
11993     if (common->mode == PCRE2_JIT_COMPLETE)
11994       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11995     else
11996       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11997     break;
11998 
11999     case OP_STAR:
12000     case OP_MINSTAR:
12001     case OP_PLUS:
12002     case OP_MINPLUS:
12003     case OP_QUERY:
12004     case OP_MINQUERY:
12005     case OP_UPTO:
12006     case OP_MINUPTO:
12007     case OP_EXACT:
12008     case OP_POSSTAR:
12009     case OP_POSPLUS:
12010     case OP_POSQUERY:
12011     case OP_POSUPTO:
12012     case OP_STARI:
12013     case OP_MINSTARI:
12014     case OP_PLUSI:
12015     case OP_MINPLUSI:
12016     case OP_QUERYI:
12017     case OP_MINQUERYI:
12018     case OP_UPTOI:
12019     case OP_MINUPTOI:
12020     case OP_EXACTI:
12021     case OP_POSSTARI:
12022     case OP_POSPLUSI:
12023     case OP_POSQUERYI:
12024     case OP_POSUPTOI:
12025     case OP_NOTSTAR:
12026     case OP_NOTMINSTAR:
12027     case OP_NOTPLUS:
12028     case OP_NOTMINPLUS:
12029     case OP_NOTQUERY:
12030     case OP_NOTMINQUERY:
12031     case OP_NOTUPTO:
12032     case OP_NOTMINUPTO:
12033     case OP_NOTEXACT:
12034     case OP_NOTPOSSTAR:
12035     case OP_NOTPOSPLUS:
12036     case OP_NOTPOSQUERY:
12037     case OP_NOTPOSUPTO:
12038     case OP_NOTSTARI:
12039     case OP_NOTMINSTARI:
12040     case OP_NOTPLUSI:
12041     case OP_NOTMINPLUSI:
12042     case OP_NOTQUERYI:
12043     case OP_NOTMINQUERYI:
12044     case OP_NOTUPTOI:
12045     case OP_NOTMINUPTOI:
12046     case OP_NOTEXACTI:
12047     case OP_NOTPOSSTARI:
12048     case OP_NOTPOSPLUSI:
12049     case OP_NOTPOSQUERYI:
12050     case OP_NOTPOSUPTOI:
12051     case OP_TYPESTAR:
12052     case OP_TYPEMINSTAR:
12053     case OP_TYPEPLUS:
12054     case OP_TYPEMINPLUS:
12055     case OP_TYPEQUERY:
12056     case OP_TYPEMINQUERY:
12057     case OP_TYPEUPTO:
12058     case OP_TYPEMINUPTO:
12059     case OP_TYPEEXACT:
12060     case OP_TYPEPOSSTAR:
12061     case OP_TYPEPOSPLUS:
12062     case OP_TYPEPOSQUERY:
12063     case OP_TYPEPOSUPTO:
12064     cc = compile_iterator_matchingpath(common, cc, parent);
12065     break;
12066 
12067     case OP_CLASS:
12068     case OP_NCLASS:
12069     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12070       cc = compile_iterator_matchingpath(common, cc, parent);
12071     else
12072       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12073     break;
12074 
12075 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12076     case OP_XCLASS:
12077     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12078       cc = compile_iterator_matchingpath(common, cc, parent);
12079     else
12080       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12081     break;
12082 #endif
12083 
12084     case OP_REF:
12085     case OP_REFI:
12086     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12087       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12088     else
12089       {
12090       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12091       cc += 1 + IMM2_SIZE;
12092       }
12093     break;
12094 
12095     case OP_DNREF:
12096     case OP_DNREFI:
12097     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12098       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12099     else
12100       {
12101       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12102       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12103       cc += 1 + 2 * IMM2_SIZE;
12104       }
12105     break;
12106 
12107     case OP_RECURSE:
12108     cc = compile_recurse_matchingpath(common, cc, parent);
12109     break;
12110 
12111     case OP_CALLOUT:
12112     case OP_CALLOUT_STR:
12113     cc = compile_callout_matchingpath(common, cc, parent);
12114     break;
12115 
12116     case OP_ASSERT:
12117     case OP_ASSERT_NOT:
12118     case OP_ASSERTBACK:
12119     case OP_ASSERTBACK_NOT:
12120     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12121     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12122     break;
12123 
12124     case OP_BRAMINZERO:
12125     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12126     cc = bracketend(cc + 1);
12127     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12128       {
12129       allocate_stack(common, 1);
12130       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12131       }
12132     else
12133       {
12134       allocate_stack(common, 2);
12135       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12136       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12137       }
12138     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12139     count_match(common);
12140     break;
12141 
12142     case OP_ASSERT_NA:
12143     case OP_ASSERTBACK_NA:
12144     case OP_ONCE:
12145     case OP_SCRIPT_RUN:
12146     case OP_BRA:
12147     case OP_CBRA:
12148     case OP_COND:
12149     case OP_SBRA:
12150     case OP_SCBRA:
12151     case OP_SCOND:
12152     cc = compile_bracket_matchingpath(common, cc, parent);
12153     break;
12154 
12155     case OP_BRAZERO:
12156     if (cc[1] > OP_ASSERTBACK_NOT)
12157       cc = compile_bracket_matchingpath(common, cc, parent);
12158     else
12159       {
12160       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12161       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12162       }
12163     break;
12164 
12165     case OP_BRAPOS:
12166     case OP_CBRAPOS:
12167     case OP_SBRAPOS:
12168     case OP_SCBRAPOS:
12169     case OP_BRAPOSZERO:
12170     cc = compile_bracketpos_matchingpath(common, cc, parent);
12171     break;
12172 
12173     case OP_MARK:
12174     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12175     SLJIT_ASSERT(common->mark_ptr != 0);
12176     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12177     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12178     if (HAS_VIRTUAL_REGISTERS)
12179       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12180     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12181     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12182     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12183     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12184     if (common->has_skip_arg)
12185       {
12186       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12187       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12188       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12189       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12190       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12191       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12192       }
12193     cc += 1 + 2 + cc[1];
12194     break;
12195 
12196     case OP_PRUNE:
12197     case OP_PRUNE_ARG:
12198     case OP_SKIP:
12199     case OP_SKIP_ARG:
12200     case OP_THEN:
12201     case OP_THEN_ARG:
12202     case OP_COMMIT:
12203     case OP_COMMIT_ARG:
12204     cc = compile_control_verb_matchingpath(common, cc, parent);
12205     break;
12206 
12207     case OP_FAIL:
12208     case OP_ACCEPT:
12209     case OP_ASSERT_ACCEPT:
12210     cc = compile_fail_accept_matchingpath(common, cc, parent);
12211     break;
12212 
12213     case OP_CLOSE:
12214     cc = compile_close_matchingpath(common, cc);
12215     break;
12216 
12217     case OP_SKIPZERO:
12218     cc = bracketend(cc + 1);
12219     break;
12220 
12221     default:
12222     SLJIT_UNREACHABLE();
12223     return;
12224     }
12225   if (cc == NULL)
12226     return;
12227   }
12228 
12229 if (has_then_trap)
12230   {
12231   /* Head item on backtrack. */
12232   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12233   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12234   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12235   common->then_trap = save_then_trap;
12236   }
12237 SLJIT_ASSERT(cc == ccend);
12238 }
12239 
12240 #undef PUSH_BACKTRACK
12241 #undef PUSH_BACKTRACK_NOVALUE
12242 #undef BACKTRACK_AS
12243 
12244 #define COMPILE_BACKTRACKINGPATH(current) \
12245   do \
12246     { \
12247     compile_backtrackingpath(common, (current)); \
12248     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12249       return; \
12250     } \
12251   while (0)
12252 
12253 #define CURRENT_AS(type) ((type *)current)
12254 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12255 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12256 {
12257 DEFINE_COMPILER;
12258 PCRE2_SPTR cc = current->cc;
12259 PCRE2_UCHAR opcode;
12260 PCRE2_UCHAR type;
12261 sljit_u32 max = 0, exact;
12262 struct sljit_label *label = NULL;
12263 struct sljit_jump *jump = NULL;
12264 jump_list *jumplist = NULL;
12265 PCRE2_SPTR end;
12266 int private_data_ptr = PRIVATE_DATA(cc);
12267 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12268 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12269 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12270 
12271 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12272 
12273 switch(opcode)
12274   {
12275   case OP_STAR:
12276   case OP_UPTO:
12277   if (type == OP_ANYNL || type == OP_EXTUNI)
12278     {
12279     SLJIT_ASSERT(private_data_ptr == 0);
12280     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12281     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12282     free_stack(common, 1);
12283     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12284     }
12285   else
12286     {
12287     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12288       {
12289       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12291       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12292 
12293       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12294       label = LABEL();
12295       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12296       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12297       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12298         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12299       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12300       move_back(common, NULL, TRUE);
12301       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12302       }
12303     else
12304       {
12305       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12306       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12307       move_back(common, NULL, TRUE);
12308       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12309       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12310       }
12311     JUMPHERE(jump);
12312     if (private_data_ptr == 0)
12313       free_stack(common, 2);
12314     }
12315   break;
12316 
12317   case OP_MINSTAR:
12318   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12319   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12320   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12321   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12322   set_jumps(jumplist, LABEL());
12323   if (private_data_ptr == 0)
12324     free_stack(common, 1);
12325   break;
12326 
12327   case OP_MINUPTO:
12328   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12329   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12330   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12331   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12332 
12333   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12334   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12335   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12336   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12337 
12338   set_jumps(jumplist, LABEL());
12339   if (private_data_ptr == 0)
12340     free_stack(common, 2);
12341   break;
12342 
12343   case OP_QUERY:
12344   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12345   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12346   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12347   jump = JUMP(SLJIT_JUMP);
12348   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12349   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12350   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12351   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12352   JUMPHERE(jump);
12353   if (private_data_ptr == 0)
12354     free_stack(common, 1);
12355   break;
12356 
12357   case OP_MINQUERY:
12358   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12359   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12360   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12361   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12362   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12363   set_jumps(jumplist, LABEL());
12364   JUMPHERE(jump);
12365   if (private_data_ptr == 0)
12366     free_stack(common, 1);
12367   break;
12368 
12369   case OP_EXACT:
12370   case OP_POSSTAR:
12371   case OP_POSQUERY:
12372   case OP_POSUPTO:
12373   break;
12374 
12375   default:
12376   SLJIT_UNREACHABLE();
12377   break;
12378   }
12379 
12380 set_jumps(current->topbacktracks, LABEL());
12381 }
12382 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12383 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12384 {
12385 DEFINE_COMPILER;
12386 PCRE2_SPTR cc = current->cc;
12387 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12388 PCRE2_UCHAR type;
12389 
12390 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12391 
12392 if ((type & 0x1) == 0)
12393   {
12394   /* Maximize case. */
12395   set_jumps(current->topbacktracks, LABEL());
12396   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12397   free_stack(common, 1);
12398   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12399   return;
12400   }
12401 
12402 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12403 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12404 set_jumps(current->topbacktracks, LABEL());
12405 free_stack(common, ref ? 2 : 3);
12406 }
12407 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12408 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12409 {
12410 DEFINE_COMPILER;
12411 recurse_entry *entry;
12412 
12413 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12414   {
12415   entry = CURRENT_AS(recurse_backtrack)->entry;
12416   if (entry->backtrack_label == NULL)
12417     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12418   else
12419     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12420   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12421   }
12422 else
12423   compile_backtrackingpath(common, current->top);
12424 
12425 set_jumps(current->topbacktracks, LABEL());
12426 }
12427 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12428 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429 {
12430 DEFINE_COMPILER;
12431 PCRE2_SPTR cc = current->cc;
12432 PCRE2_UCHAR bra = OP_BRA;
12433 struct sljit_jump *brajump = NULL;
12434 
12435 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12436 if (*cc == OP_BRAZERO)
12437   {
12438   bra = *cc;
12439   cc++;
12440   }
12441 
12442 if (bra == OP_BRAZERO)
12443   {
12444   SLJIT_ASSERT(current->topbacktracks == NULL);
12445   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12446   }
12447 
12448 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12449   {
12450   set_jumps(current->topbacktracks, LABEL());
12451 
12452   if (bra == OP_BRAZERO)
12453     {
12454     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12455     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12456     free_stack(common, 1);
12457     }
12458   return;
12459   }
12460 
12461 if (bra == OP_BRAZERO)
12462   {
12463   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12464     {
12465     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12466     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12467     free_stack(common, 1);
12468     return;
12469     }
12470   free_stack(common, 1);
12471   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12472   }
12473 
12474 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12475   {
12476   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12477   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12478   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12479   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12480   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12481 
12482   set_jumps(current->topbacktracks, LABEL());
12483   }
12484 else
12485   set_jumps(current->topbacktracks, LABEL());
12486 
12487 if (bra == OP_BRAZERO)
12488   {
12489   /* We know there is enough place on the stack. */
12490   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12491   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12492   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12493   JUMPHERE(brajump);
12494   }
12495 }
12496 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12497 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12498 {
12499 DEFINE_COMPILER;
12500 int opcode, stacksize, alt_count, alt_max;
12501 int offset = 0;
12502 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12503 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12504 PCRE2_SPTR cc = current->cc;
12505 PCRE2_SPTR ccbegin;
12506 PCRE2_SPTR ccprev;
12507 PCRE2_UCHAR bra = OP_BRA;
12508 PCRE2_UCHAR ket;
12509 assert_backtrack *assert;
12510 BOOL has_alternatives;
12511 BOOL needs_control_head = FALSE;
12512 struct sljit_jump *brazero = NULL;
12513 struct sljit_jump *next_alt = NULL;
12514 struct sljit_jump *once = NULL;
12515 struct sljit_jump *cond = NULL;
12516 struct sljit_label *rmin_label = NULL;
12517 struct sljit_label *exact_label = NULL;
12518 struct sljit_put_label *put_label = NULL;
12519 
12520 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12521   {
12522   bra = *cc;
12523   cc++;
12524   }
12525 
12526 opcode = *cc;
12527 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12528 ket = *ccbegin;
12529 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12530   {
12531   repeat_ptr = PRIVATE_DATA(ccbegin);
12532   repeat_type = PRIVATE_DATA(ccbegin + 2);
12533   repeat_count = PRIVATE_DATA(ccbegin + 3);
12534   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12535   if (repeat_type == OP_UPTO)
12536     ket = OP_KETRMAX;
12537   if (repeat_type == OP_MINUPTO)
12538     ket = OP_KETRMIN;
12539   }
12540 ccbegin = cc;
12541 cc += GET(cc, 1);
12542 has_alternatives = *cc == OP_ALT;
12543 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12544   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12545 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12546   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12547 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12548   opcode = OP_SCOND;
12549 
12550 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12551 
12552 /* Decoding the needs_control_head in framesize. */
12553 if (opcode == OP_ONCE)
12554   {
12555   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12556   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12557   }
12558 
12559 if (ket != OP_KET && repeat_type != 0)
12560   {
12561   /* TMP1 is used in OP_KETRMIN below. */
12562   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12563   free_stack(common, 1);
12564   if (repeat_type == OP_UPTO)
12565     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12566   else
12567     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12568   }
12569 
12570 if (ket == OP_KETRMAX)
12571   {
12572   if (bra == OP_BRAZERO)
12573     {
12574     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12575     free_stack(common, 1);
12576     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12577     }
12578   }
12579 else if (ket == OP_KETRMIN)
12580   {
12581   if (bra != OP_BRAMINZERO)
12582     {
12583     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12584     if (repeat_type != 0)
12585       {
12586       /* TMP1 was set a few lines above. */
12587       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12588       /* Drop STR_PTR for non-greedy plus quantifier. */
12589       if (opcode != OP_ONCE)
12590         free_stack(common, 1);
12591       }
12592     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12593       {
12594       /* Checking zero-length iteration. */
12595       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12596         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12597       else
12598         {
12599         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12600         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12601         }
12602       /* Drop STR_PTR for non-greedy plus quantifier. */
12603       if (opcode != OP_ONCE)
12604         free_stack(common, 1);
12605       }
12606     else
12607       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12608     }
12609   rmin_label = LABEL();
12610   if (repeat_type != 0)
12611     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12612   }
12613 else if (bra == OP_BRAZERO)
12614   {
12615   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12616   free_stack(common, 1);
12617   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12618   }
12619 else if (repeat_type == OP_EXACT)
12620   {
12621   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12622   exact_label = LABEL();
12623   }
12624 
12625 if (offset != 0)
12626   {
12627   if (common->capture_last_ptr != 0)
12628     {
12629     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12630     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12631     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12632     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12633     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12634     free_stack(common, 3);
12635     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12636     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12637     }
12638   else if (common->optimized_cbracket[offset >> 1] == 0)
12639     {
12640     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12641     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12642     free_stack(common, 2);
12643     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12644     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12645     }
12646   }
12647 
12648 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12649   {
12650   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12651     {
12652     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12653     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12654     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12655     }
12656   once = JUMP(SLJIT_JUMP);
12657   }
12658 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12659   {
12660   if (has_alternatives)
12661     {
12662     /* Always exactly one alternative. */
12663     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12664     free_stack(common, 1);
12665 
12666     alt_max = 2;
12667     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12668     }
12669   }
12670 else if (has_alternatives)
12671   {
12672   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12673   free_stack(common, 1);
12674 
12675   if (alt_max > 3)
12676     {
12677     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12678 
12679     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12680     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12681     sljit_emit_op0(compiler, SLJIT_ENDBR);
12682     }
12683   else
12684     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12685   }
12686 
12687 COMPILE_BACKTRACKINGPATH(current->top);
12688 if (current->topbacktracks)
12689   set_jumps(current->topbacktracks, LABEL());
12690 
12691 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12692   {
12693   /* Conditional block always has at most one alternative. */
12694   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12695     {
12696     SLJIT_ASSERT(has_alternatives);
12697     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12698     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12699       {
12700       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12701       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12702       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12703       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12704       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12705       }
12706     cond = JUMP(SLJIT_JUMP);
12707     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12708     }
12709   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12710     {
12711     SLJIT_ASSERT(has_alternatives);
12712     cond = JUMP(SLJIT_JUMP);
12713     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12714     }
12715   else
12716     SLJIT_ASSERT(!has_alternatives);
12717   }
12718 
12719 if (has_alternatives)
12720   {
12721   alt_count = 1;
12722   do
12723     {
12724     current->top = NULL;
12725     current->topbacktracks = NULL;
12726     current->nextbacktracks = NULL;
12727     /* Conditional blocks always have an additional alternative, even if it is empty. */
12728     if (*cc == OP_ALT)
12729       {
12730       ccprev = cc + 1 + LINK_SIZE;
12731       cc += GET(cc, 1);
12732       if (opcode != OP_COND && opcode != OP_SCOND)
12733         {
12734         if (opcode != OP_ONCE)
12735           {
12736           if (private_data_ptr != 0)
12737             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12738           else
12739             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12740           }
12741         else
12742           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12743         }
12744       compile_matchingpath(common, ccprev, cc, current);
12745       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12746         return;
12747 
12748       if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12749         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12750 
12751       if (opcode == OP_SCRIPT_RUN)
12752         match_script_run_common(common, private_data_ptr, current);
12753       }
12754 
12755     /* Instructions after the current alternative is successfully matched. */
12756     /* There is a similar code in compile_bracket_matchingpath. */
12757     if (opcode == OP_ONCE)
12758       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12759 
12760     stacksize = 0;
12761     if (repeat_type == OP_MINUPTO)
12762       {
12763       /* We need to preserve the counter. TMP2 will be used below. */
12764       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12765       stacksize++;
12766       }
12767     if (ket != OP_KET || bra != OP_BRA)
12768       stacksize++;
12769     if (offset != 0)
12770       {
12771       if (common->capture_last_ptr != 0)
12772         stacksize++;
12773       if (common->optimized_cbracket[offset >> 1] == 0)
12774         stacksize += 2;
12775       }
12776     if (opcode != OP_ONCE)
12777       stacksize++;
12778 
12779     if (stacksize > 0)
12780       allocate_stack(common, stacksize);
12781 
12782     stacksize = 0;
12783     if (repeat_type == OP_MINUPTO)
12784       {
12785       /* TMP2 was set above. */
12786       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12787       stacksize++;
12788       }
12789 
12790     if (ket != OP_KET || bra != OP_BRA)
12791       {
12792       if (ket != OP_KET)
12793         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12794       else
12795         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12796       stacksize++;
12797       }
12798 
12799     if (offset != 0)
12800       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12801 
12802     if (opcode != OP_ONCE)
12803       {
12804       if (alt_max <= 3)
12805         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12806       else
12807         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12808       }
12809 
12810     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12811       {
12812       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12813       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12814       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12815       }
12816 
12817     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12818 
12819     if (opcode != OP_ONCE)
12820       {
12821       if (alt_max <= 3)
12822         {
12823         JUMPHERE(next_alt);
12824         alt_count++;
12825         if (alt_count < alt_max)
12826           {
12827           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12828           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12829           }
12830         }
12831       else
12832         {
12833         sljit_set_put_label(put_label, LABEL());
12834         sljit_emit_op0(compiler, SLJIT_ENDBR);
12835         }
12836       }
12837 
12838     COMPILE_BACKTRACKINGPATH(current->top);
12839     if (current->topbacktracks)
12840       set_jumps(current->topbacktracks, LABEL());
12841     SLJIT_ASSERT(!current->nextbacktracks);
12842     }
12843   while (*cc == OP_ALT);
12844 
12845   if (cond != NULL)
12846     {
12847     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12848     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12849     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12850       {
12851       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12852       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12853       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12854       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12855       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12856       }
12857     JUMPHERE(cond);
12858     }
12859 
12860   /* Free the STR_PTR. */
12861   if (private_data_ptr == 0)
12862     free_stack(common, 1);
12863   }
12864 
12865 if (offset != 0)
12866   {
12867   /* Using both tmp register is better for instruction scheduling. */
12868   if (common->optimized_cbracket[offset >> 1] != 0)
12869     {
12870     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12871     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12872     free_stack(common, 2);
12873     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12874     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12875     }
12876   else
12877     {
12878     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12879     free_stack(common, 1);
12880     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12881     }
12882   }
12883 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12884   {
12885   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12886   free_stack(common, 1);
12887   }
12888 else if (opcode == OP_ONCE)
12889   {
12890   cc = ccbegin + GET(ccbegin, 1);
12891   stacksize = needs_control_head ? 1 : 0;
12892 
12893   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12894     {
12895     /* Reset head and drop saved frame. */
12896     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12897     }
12898   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12899     {
12900     /* The STR_PTR must be released. */
12901     stacksize++;
12902     }
12903 
12904   if (stacksize > 0)
12905     free_stack(common, stacksize);
12906 
12907   JUMPHERE(once);
12908   /* Restore previous private_data_ptr */
12909   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12910     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12911   else if (ket == OP_KETRMIN)
12912     {
12913     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12914     /* See the comment below. */
12915     free_stack(common, 2);
12916     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12917     }
12918   }
12919 
12920 if (repeat_type == OP_EXACT)
12921   {
12922   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12923   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12924   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12925   }
12926 else if (ket == OP_KETRMAX)
12927   {
12928   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12929   if (bra != OP_BRAZERO)
12930     free_stack(common, 1);
12931 
12932   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12933   if (bra == OP_BRAZERO)
12934     {
12935     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12936     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12937     JUMPHERE(brazero);
12938     free_stack(common, 1);
12939     }
12940   }
12941 else if (ket == OP_KETRMIN)
12942   {
12943   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12944 
12945   /* OP_ONCE removes everything in case of a backtrack, so we don't
12946   need to explicitly release the STR_PTR. The extra release would
12947   affect badly the free_stack(2) above. */
12948   if (opcode != OP_ONCE)
12949     free_stack(common, 1);
12950   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12951   if (opcode == OP_ONCE)
12952     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12953   else if (bra == OP_BRAMINZERO)
12954     free_stack(common, 1);
12955   }
12956 else if (bra == OP_BRAZERO)
12957   {
12958   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12959   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12960   JUMPHERE(brazero);
12961   }
12962 }
12963 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12964 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12965 {
12966 DEFINE_COMPILER;
12967 int offset;
12968 struct sljit_jump *jump;
12969 
12970 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12971   {
12972   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12973     {
12974     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12975     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12976     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12977     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12978     if (common->capture_last_ptr != 0)
12979       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12980     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12981     if (common->capture_last_ptr != 0)
12982       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12983     }
12984   set_jumps(current->topbacktracks, LABEL());
12985   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12986   return;
12987   }
12988 
12989 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12990 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12991 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12992 
12993 if (current->topbacktracks)
12994   {
12995   jump = JUMP(SLJIT_JUMP);
12996   set_jumps(current->topbacktracks, LABEL());
12997   /* Drop the stack frame. */
12998   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12999   JUMPHERE(jump);
13000   }
13001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13002 }
13003 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13004 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13005 {
13006 assert_backtrack backtrack;
13007 
13008 current->top = NULL;
13009 current->topbacktracks = NULL;
13010 current->nextbacktracks = NULL;
13011 if (current->cc[1] > OP_ASSERTBACK_NOT)
13012   {
13013   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13014   compile_bracket_matchingpath(common, current->cc, current);
13015   compile_bracket_backtrackingpath(common, current->top);
13016   }
13017 else
13018   {
13019   memset(&backtrack, 0, sizeof(backtrack));
13020   backtrack.common.cc = current->cc;
13021   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13022   /* Manual call of compile_assert_matchingpath. */
13023   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13024   }
13025 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13026 }
13027 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13028 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13029 {
13030 DEFINE_COMPILER;
13031 PCRE2_UCHAR opcode = *current->cc;
13032 struct sljit_label *loop;
13033 struct sljit_jump *jump;
13034 
13035 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13036   {
13037   if (common->then_trap != NULL)
13038     {
13039     SLJIT_ASSERT(common->control_head_ptr != 0);
13040 
13041     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13042     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13043     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13044     jump = JUMP(SLJIT_JUMP);
13045 
13046     loop = LABEL();
13047     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048     JUMPHERE(jump);
13049     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13050     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13051     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13052     return;
13053     }
13054   else if (!common->local_quit_available && common->in_positive_assertion)
13055     {
13056     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13057     return;
13058     }
13059   }
13060 
13061 if (common->local_quit_available)
13062   {
13063   /* Abort match with a fail. */
13064   if (common->quit_label == NULL)
13065     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13066   else
13067     JUMPTO(SLJIT_JUMP, common->quit_label);
13068   return;
13069   }
13070 
13071 if (opcode == OP_SKIP_ARG)
13072   {
13073   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13074   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13075   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13076   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13077 
13078   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13079   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13080   return;
13081   }
13082 
13083 if (opcode == OP_SKIP)
13084   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13085 else
13086   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13087 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13088 }
13089 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13090 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13091 {
13092 DEFINE_COMPILER;
13093 struct sljit_jump *jump;
13094 int size;
13095 
13096 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13097   {
13098   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13099   return;
13100   }
13101 
13102 size = CURRENT_AS(then_trap_backtrack)->framesize;
13103 size = 3 + (size < 0 ? 0 : size);
13104 
13105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13106 free_stack(common, size);
13107 jump = JUMP(SLJIT_JUMP);
13108 
13109 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13110 /* STACK_TOP is set by THEN. */
13111 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13112   {
13113   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13114   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13115   }
13116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13117 free_stack(common, 3);
13118 
13119 JUMPHERE(jump);
13120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13121 }
13122 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13123 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13124 {
13125 DEFINE_COMPILER;
13126 then_trap_backtrack *save_then_trap = common->then_trap;
13127 
13128 while (current)
13129   {
13130   if (current->nextbacktracks != NULL)
13131     set_jumps(current->nextbacktracks, LABEL());
13132   switch(*current->cc)
13133     {
13134     case OP_SET_SOM:
13135     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136     free_stack(common, 1);
13137     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13138     break;
13139 
13140     case OP_STAR:
13141     case OP_MINSTAR:
13142     case OP_PLUS:
13143     case OP_MINPLUS:
13144     case OP_QUERY:
13145     case OP_MINQUERY:
13146     case OP_UPTO:
13147     case OP_MINUPTO:
13148     case OP_EXACT:
13149     case OP_POSSTAR:
13150     case OP_POSPLUS:
13151     case OP_POSQUERY:
13152     case OP_POSUPTO:
13153     case OP_STARI:
13154     case OP_MINSTARI:
13155     case OP_PLUSI:
13156     case OP_MINPLUSI:
13157     case OP_QUERYI:
13158     case OP_MINQUERYI:
13159     case OP_UPTOI:
13160     case OP_MINUPTOI:
13161     case OP_EXACTI:
13162     case OP_POSSTARI:
13163     case OP_POSPLUSI:
13164     case OP_POSQUERYI:
13165     case OP_POSUPTOI:
13166     case OP_NOTSTAR:
13167     case OP_NOTMINSTAR:
13168     case OP_NOTPLUS:
13169     case OP_NOTMINPLUS:
13170     case OP_NOTQUERY:
13171     case OP_NOTMINQUERY:
13172     case OP_NOTUPTO:
13173     case OP_NOTMINUPTO:
13174     case OP_NOTEXACT:
13175     case OP_NOTPOSSTAR:
13176     case OP_NOTPOSPLUS:
13177     case OP_NOTPOSQUERY:
13178     case OP_NOTPOSUPTO:
13179     case OP_NOTSTARI:
13180     case OP_NOTMINSTARI:
13181     case OP_NOTPLUSI:
13182     case OP_NOTMINPLUSI:
13183     case OP_NOTQUERYI:
13184     case OP_NOTMINQUERYI:
13185     case OP_NOTUPTOI:
13186     case OP_NOTMINUPTOI:
13187     case OP_NOTEXACTI:
13188     case OP_NOTPOSSTARI:
13189     case OP_NOTPOSPLUSI:
13190     case OP_NOTPOSQUERYI:
13191     case OP_NOTPOSUPTOI:
13192     case OP_TYPESTAR:
13193     case OP_TYPEMINSTAR:
13194     case OP_TYPEPLUS:
13195     case OP_TYPEMINPLUS:
13196     case OP_TYPEQUERY:
13197     case OP_TYPEMINQUERY:
13198     case OP_TYPEUPTO:
13199     case OP_TYPEMINUPTO:
13200     case OP_TYPEEXACT:
13201     case OP_TYPEPOSSTAR:
13202     case OP_TYPEPOSPLUS:
13203     case OP_TYPEPOSQUERY:
13204     case OP_TYPEPOSUPTO:
13205     case OP_CLASS:
13206     case OP_NCLASS:
13207 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13208     case OP_XCLASS:
13209 #endif
13210     compile_iterator_backtrackingpath(common, current);
13211     break;
13212 
13213     case OP_REF:
13214     case OP_REFI:
13215     case OP_DNREF:
13216     case OP_DNREFI:
13217     compile_ref_iterator_backtrackingpath(common, current);
13218     break;
13219 
13220     case OP_RECURSE:
13221     compile_recurse_backtrackingpath(common, current);
13222     break;
13223 
13224     case OP_ASSERT:
13225     case OP_ASSERT_NOT:
13226     case OP_ASSERTBACK:
13227     case OP_ASSERTBACK_NOT:
13228     compile_assert_backtrackingpath(common, current);
13229     break;
13230 
13231     case OP_ASSERT_NA:
13232     case OP_ASSERTBACK_NA:
13233     case OP_ONCE:
13234     case OP_SCRIPT_RUN:
13235     case OP_BRA:
13236     case OP_CBRA:
13237     case OP_COND:
13238     case OP_SBRA:
13239     case OP_SCBRA:
13240     case OP_SCOND:
13241     compile_bracket_backtrackingpath(common, current);
13242     break;
13243 
13244     case OP_BRAZERO:
13245     if (current->cc[1] > OP_ASSERTBACK_NOT)
13246       compile_bracket_backtrackingpath(common, current);
13247     else
13248       compile_assert_backtrackingpath(common, current);
13249     break;
13250 
13251     case OP_BRAPOS:
13252     case OP_CBRAPOS:
13253     case OP_SBRAPOS:
13254     case OP_SCBRAPOS:
13255     case OP_BRAPOSZERO:
13256     compile_bracketpos_backtrackingpath(common, current);
13257     break;
13258 
13259     case OP_BRAMINZERO:
13260     compile_braminzero_backtrackingpath(common, current);
13261     break;
13262 
13263     case OP_MARK:
13264     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13265     if (common->has_skip_arg)
13266       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13267     free_stack(common, common->has_skip_arg ? 5 : 1);
13268     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13269     if (common->has_skip_arg)
13270       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13271     break;
13272 
13273     case OP_THEN:
13274     case OP_THEN_ARG:
13275     case OP_PRUNE:
13276     case OP_PRUNE_ARG:
13277     case OP_SKIP:
13278     case OP_SKIP_ARG:
13279     compile_control_verb_backtrackingpath(common, current);
13280     break;
13281 
13282     case OP_COMMIT:
13283     case OP_COMMIT_ARG:
13284     if (!common->local_quit_available)
13285       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13286     if (common->quit_label == NULL)
13287       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13288     else
13289       JUMPTO(SLJIT_JUMP, common->quit_label);
13290     break;
13291 
13292     case OP_CALLOUT:
13293     case OP_CALLOUT_STR:
13294     case OP_FAIL:
13295     case OP_ACCEPT:
13296     case OP_ASSERT_ACCEPT:
13297     set_jumps(current->topbacktracks, LABEL());
13298     break;
13299 
13300     case OP_THEN_TRAP:
13301     /* A virtual opcode for then traps. */
13302     compile_then_trap_backtrackingpath(common, current);
13303     break;
13304 
13305     default:
13306     SLJIT_UNREACHABLE();
13307     break;
13308     }
13309   current = current->prev;
13310   }
13311 common->then_trap = save_then_trap;
13312 }
13313 
compile_recurse(compiler_common * common)13314 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13315 {
13316 DEFINE_COMPILER;
13317 PCRE2_SPTR cc = common->start + common->currententry->start;
13318 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13319 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13320 BOOL needs_control_head;
13321 BOOL has_quit;
13322 BOOL has_accept;
13323 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13324 int alt_count, alt_max, local_size;
13325 backtrack_common altbacktrack;
13326 jump_list *match = NULL;
13327 struct sljit_jump *next_alt = NULL;
13328 struct sljit_jump *accept_exit = NULL;
13329 struct sljit_label *quit;
13330 struct sljit_put_label *put_label = NULL;
13331 
13332 /* Recurse captures then. */
13333 common->then_trap = NULL;
13334 
13335 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13336 
13337 alt_max = no_alternatives(cc);
13338 alt_count = 0;
13339 
13340 /* Matching path. */
13341 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13342 common->currententry->entry_label = LABEL();
13343 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13344 
13345 sljit_emit_fast_enter(compiler, TMP2, 0);
13346 count_match(common);
13347 
13348 local_size = (alt_max > 1) ? 2 : 1;
13349 
13350 /* (Reversed) stack layout:
13351    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13352 
13353 allocate_stack(common, private_data_size + local_size);
13354 /* Save return address. */
13355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13356 
13357 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13358 
13359 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13361 
13362 if (needs_control_head)
13363   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13364 
13365 if (alt_max > 1)
13366   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13367 
13368 memset(&altbacktrack, 0, sizeof(backtrack_common));
13369 common->quit_label = NULL;
13370 common->accept_label = NULL;
13371 common->quit = NULL;
13372 common->accept = NULL;
13373 altbacktrack.cc = ccbegin;
13374 cc += GET(cc, 1);
13375 while (1)
13376   {
13377   altbacktrack.top = NULL;
13378   altbacktrack.topbacktracks = NULL;
13379 
13380   if (altbacktrack.cc != ccbegin)
13381     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13382 
13383   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13384   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385     return;
13386 
13387   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13388   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13389 
13390   if (alt_max > 1 || has_accept)
13391     {
13392     if (alt_max > 3)
13393       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13394     else
13395       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13396     }
13397 
13398   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13399 
13400   if (alt_count == 0)
13401     {
13402     /* Backtracking path entry. */
13403     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13404     common->currententry->backtrack_label = LABEL();
13405     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13406 
13407     sljit_emit_fast_enter(compiler, TMP1, 0);
13408 
13409     if (has_accept)
13410       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13411 
13412     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13413     /* Save return address. */
13414     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13415 
13416     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13417 
13418     if (alt_max > 1)
13419       {
13420       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13421       free_stack(common, 2);
13422 
13423       if (alt_max > 3)
13424         {
13425         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13426         sljit_set_put_label(put_label, LABEL());
13427         sljit_emit_op0(compiler, SLJIT_ENDBR);
13428         }
13429       else
13430         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13431       }
13432     else
13433       free_stack(common, has_accept ? 2 : 1);
13434     }
13435   else if (alt_max > 3)
13436     {
13437     sljit_set_put_label(put_label, LABEL());
13438     sljit_emit_op0(compiler, SLJIT_ENDBR);
13439     }
13440   else
13441     {
13442     JUMPHERE(next_alt);
13443     if (alt_count + 1 < alt_max)
13444       {
13445       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13446       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13447       }
13448     }
13449 
13450   alt_count++;
13451 
13452   compile_backtrackingpath(common, altbacktrack.top);
13453   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13454     return;
13455   set_jumps(altbacktrack.topbacktracks, LABEL());
13456 
13457   if (*cc != OP_ALT)
13458     break;
13459 
13460   altbacktrack.cc = cc + 1 + LINK_SIZE;
13461   cc += GET(cc, 1);
13462   }
13463 
13464 /* No alternative is matched. */
13465 
13466 quit = LABEL();
13467 
13468 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13469 
13470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13471 free_stack(common, private_data_size + local_size);
13472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13473 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13474 
13475 if (common->quit != NULL)
13476   {
13477   SLJIT_ASSERT(has_quit);
13478 
13479   set_jumps(common->quit, LABEL());
13480   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13481   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13482   JUMPTO(SLJIT_JUMP, quit);
13483   }
13484 
13485 if (has_accept)
13486   {
13487   JUMPHERE(accept_exit);
13488   free_stack(common, 2);
13489 
13490   /* Save return address. */
13491   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13492 
13493   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13494 
13495   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13496   free_stack(common, private_data_size + local_size);
13497   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13498   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13499   }
13500 
13501 if (common->accept != NULL)
13502   {
13503   SLJIT_ASSERT(has_accept);
13504 
13505   set_jumps(common->accept, LABEL());
13506 
13507   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13508   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13509 
13510   allocate_stack(common, 2);
13511   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13512   }
13513 
13514 set_jumps(match, LABEL());
13515 
13516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13517 
13518 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13519 
13520 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13521 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13522 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13523 }
13524 
13525 #undef COMPILE_BACKTRACKINGPATH
13526 #undef CURRENT_AS
13527 
13528 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13529   (PCRE2_JIT_INVALID_UTF)
13530 
jit_compile(pcre2_code * code,sljit_u32 mode)13531 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13532 {
13533 pcre2_real_code *re = (pcre2_real_code *)code;
13534 struct sljit_compiler *compiler;
13535 backtrack_common rootbacktrack;
13536 compiler_common common_data;
13537 compiler_common *common = &common_data;
13538 const sljit_u8 *tables = re->tables;
13539 void *allocator_data = &re->memctl;
13540 int private_data_size;
13541 PCRE2_SPTR ccend;
13542 executable_functions *functions;
13543 void *executable_func;
13544 sljit_uw executable_size;
13545 sljit_uw total_length;
13546 struct sljit_label *mainloop_label = NULL;
13547 struct sljit_label *continue_match_label;
13548 struct sljit_label *empty_match_found_label = NULL;
13549 struct sljit_label *empty_match_backtrack_label = NULL;
13550 struct sljit_label *reset_match_label;
13551 struct sljit_label *quit_label;
13552 struct sljit_jump *jump;
13553 struct sljit_jump *minlength_check_failed = NULL;
13554 struct sljit_jump *empty_match = NULL;
13555 struct sljit_jump *end_anchor_failed = NULL;
13556 jump_list *reqcu_not_found = NULL;
13557 
13558 SLJIT_ASSERT(tables);
13559 
13560 #if HAS_VIRTUAL_REGISTERS == 1
13561 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13562 #elif HAS_VIRTUAL_REGISTERS == 0
13563 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13564 #else
13565 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13566 #endif
13567 
13568 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13569 memset(common, 0, sizeof(compiler_common));
13570 common->re = re;
13571 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13572 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13573 
13574 #ifdef SUPPORT_UNICODE
13575 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13576 #endif /* SUPPORT_UNICODE */
13577 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13578 
13579 common->start = rootbacktrack.cc;
13580 common->read_only_data_head = NULL;
13581 common->fcc = tables + fcc_offset;
13582 common->lcc = (sljit_sw)(tables + lcc_offset);
13583 common->mode = mode;
13584 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13585 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13586 common->nltype = NLTYPE_FIXED;
13587 switch(re->newline_convention)
13588   {
13589   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13590   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13591   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13592   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13593   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13594   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13595   default: return PCRE2_ERROR_INTERNAL;
13596   }
13597 common->nlmax = READ_CHAR_MAX;
13598 common->nlmin = 0;
13599 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13600   common->bsr_nltype = NLTYPE_ANY;
13601 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13602   common->bsr_nltype = NLTYPE_ANYCRLF;
13603 else
13604   {
13605 #ifdef BSR_ANYCRLF
13606   common->bsr_nltype = NLTYPE_ANYCRLF;
13607 #else
13608   common->bsr_nltype = NLTYPE_ANY;
13609 #endif
13610   }
13611 common->bsr_nlmax = READ_CHAR_MAX;
13612 common->bsr_nlmin = 0;
13613 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13614 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13615 common->name_count = re->name_count;
13616 common->name_entry_size = re->name_entry_size;
13617 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13618 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13619 #ifdef SUPPORT_UNICODE
13620 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13621 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13622 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13623 if (common->utf)
13624   {
13625   if (common->nltype == NLTYPE_ANY)
13626     common->nlmax = 0x2029;
13627   else if (common->nltype == NLTYPE_ANYCRLF)
13628     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13629   else
13630     {
13631     /* We only care about the first newline character. */
13632     common->nlmax = common->newline & 0xff;
13633     }
13634 
13635   if (common->nltype == NLTYPE_FIXED)
13636     common->nlmin = common->newline & 0xff;
13637   else
13638     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13639 
13640   if (common->bsr_nltype == NLTYPE_ANY)
13641     common->bsr_nlmax = 0x2029;
13642   else
13643     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13644   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13645   }
13646 else
13647   common->invalid_utf = FALSE;
13648 #endif /* SUPPORT_UNICODE */
13649 ccend = bracketend(common->start);
13650 
13651 /* Calculate the local space size on the stack. */
13652 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13653 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13654 if (!common->optimized_cbracket)
13655   return PCRE2_ERROR_NOMEMORY;
13656 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13657 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13658 #else
13659 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13660 #endif
13661 
13662 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13663 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13664 common->capture_last_ptr = common->ovector_start;
13665 common->ovector_start += sizeof(sljit_sw);
13666 #endif
13667 if (!check_opcode_types(common, common->start, ccend))
13668   {
13669   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13670   return PCRE2_ERROR_NOMEMORY;
13671   }
13672 
13673 /* Checking flags and updating ovector_start. */
13674 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13675   {
13676   common->req_char_ptr = common->ovector_start;
13677   common->ovector_start += sizeof(sljit_sw);
13678   }
13679 if (mode != PCRE2_JIT_COMPLETE)
13680   {
13681   common->start_used_ptr = common->ovector_start;
13682   common->ovector_start += sizeof(sljit_sw);
13683   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13684     {
13685     common->hit_start = common->ovector_start;
13686     common->ovector_start += sizeof(sljit_sw);
13687     }
13688   }
13689 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13690   {
13691   common->match_end_ptr = common->ovector_start;
13692   common->ovector_start += sizeof(sljit_sw);
13693   }
13694 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13695 common->control_head_ptr = 1;
13696 #endif
13697 if (common->control_head_ptr != 0)
13698   {
13699   common->control_head_ptr = common->ovector_start;
13700   common->ovector_start += sizeof(sljit_sw);
13701   }
13702 if (common->has_set_som)
13703   {
13704   /* Saving the real start pointer is necessary. */
13705   common->start_ptr = common->ovector_start;
13706   common->ovector_start += sizeof(sljit_sw);
13707   }
13708 
13709 /* Aligning ovector to even number of sljit words. */
13710 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13711   common->ovector_start += sizeof(sljit_sw);
13712 
13713 if (common->start_ptr == 0)
13714   common->start_ptr = OVECTOR(0);
13715 
13716 /* Capturing brackets cannot be optimized if callouts are allowed. */
13717 if (common->capture_last_ptr != 0)
13718   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13719 
13720 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13721 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13722 
13723 total_length = ccend - common->start;
13724 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13725 if (!common->private_data_ptrs)
13726   {
13727   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13728   return PCRE2_ERROR_NOMEMORY;
13729   }
13730 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13731 
13732 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13733 
13734 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13735   detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13736 
13737 set_private_data_ptrs(common, &private_data_size, ccend);
13738 
13739 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13740 
13741 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13742   {
13743   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13744   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13745   return PCRE2_ERROR_NOMEMORY;
13746   }
13747 
13748 if (common->has_then)
13749   {
13750   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13751   memset(common->then_offsets, 0, total_length);
13752   set_then_offsets(common, common->start, NULL);
13753   }
13754 
13755 compiler = sljit_create_compiler(allocator_data, NULL);
13756 if (!compiler)
13757   {
13758   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13759   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13760   return PCRE2_ERROR_NOMEMORY;
13761   }
13762 common->compiler = compiler;
13763 
13764 /* Main pcre_jit_exec entry. */
13765 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13766 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13767 
13768 /* Register init. */
13769 reset_ovector(common, (re->top_bracket + 1) * 2);
13770 if (common->req_char_ptr != 0)
13771   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13772 
13773 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13775 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13776 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13777 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13778 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13779 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13780 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13781 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13783 
13784 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13785   reset_early_fail(common);
13786 
13787 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13788   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13789 if (common->mark_ptr != 0)
13790   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13791 if (common->control_head_ptr != 0)
13792   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13793 
13794 /* Main part of the matching */
13795 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13796   {
13797   mainloop_label = mainloop_entry(common);
13798   continue_match_label = LABEL();
13799   /* Forward search if possible. */
13800   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13801     {
13802     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13803       ;
13804     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13805       fast_forward_first_char(common);
13806     else if ((re->flags & PCRE2_STARTLINE) != 0)
13807       fast_forward_newline(common);
13808     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13809       fast_forward_start_bits(common);
13810     }
13811   }
13812 else
13813   continue_match_label = LABEL();
13814 
13815 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13816   {
13817   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13818   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13819   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13820   }
13821 if (common->req_char_ptr != 0)
13822   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13823 
13824 /* Store the current STR_PTR in OVECTOR(0). */
13825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13826 /* Copy the limit of allowed recursions. */
13827 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13828 if (common->capture_last_ptr != 0)
13829   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13830 if (common->fast_forward_bc_ptr != NULL)
13831   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13832 
13833 if (common->start_ptr != OVECTOR(0))
13834   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13835 
13836 /* Copy the beginning of the string. */
13837 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13838   {
13839   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13840   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13841   JUMPHERE(jump);
13842   }
13843 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13844   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13845 
13846 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13847 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13848   {
13849   sljit_free_compiler(compiler);
13850   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13851   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13852   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13853   return PCRE2_ERROR_NOMEMORY;
13854   }
13855 
13856 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13857   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13858 
13859 if (common->might_be_empty)
13860   {
13861   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13862   empty_match_found_label = LABEL();
13863   }
13864 
13865 common->accept_label = LABEL();
13866 if (common->accept != NULL)
13867   set_jumps(common->accept, common->accept_label);
13868 
13869 /* This means we have a match. Update the ovector. */
13870 copy_ovector(common, re->top_bracket + 1);
13871 common->quit_label = common->abort_label = LABEL();
13872 if (common->quit != NULL)
13873   set_jumps(common->quit, common->quit_label);
13874 if (common->abort != NULL)
13875   set_jumps(common->abort, common->abort_label);
13876 if (minlength_check_failed != NULL)
13877   SET_LABEL(minlength_check_failed, common->abort_label);
13878 
13879 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13880 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13881 
13882 if (common->failed_match != NULL)
13883   {
13884   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13885   set_jumps(common->failed_match, LABEL());
13886   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13887   JUMPTO(SLJIT_JUMP, common->abort_label);
13888   }
13889 
13890 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13891   JUMPHERE(end_anchor_failed);
13892 
13893 if (mode != PCRE2_JIT_COMPLETE)
13894   {
13895   common->partialmatchlabel = LABEL();
13896   set_jumps(common->partialmatch, common->partialmatchlabel);
13897   return_with_partial_match(common, common->quit_label);
13898   }
13899 
13900 if (common->might_be_empty)
13901   empty_match_backtrack_label = LABEL();
13902 compile_backtrackingpath(common, rootbacktrack.top);
13903 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13904   {
13905   sljit_free_compiler(compiler);
13906   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13907   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13908   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13909   return PCRE2_ERROR_NOMEMORY;
13910   }
13911 
13912 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13913 reset_match_label = LABEL();
13914 
13915 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13916   {
13917   /* Update hit_start only in the first time. */
13918   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13919   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13920   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13921   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13922   JUMPHERE(jump);
13923   }
13924 
13925 /* Check we have remaining characters. */
13926 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13927   {
13928   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13929   }
13930 
13931 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13932     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13933 
13934 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13935   {
13936   if (common->ff_newline_shortcut != NULL)
13937     {
13938     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13939     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13940       {
13941       if (common->match_end_ptr != 0)
13942         {
13943         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13944         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13945         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13946         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13947         }
13948       else
13949         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13950       }
13951     }
13952   else
13953     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13954   }
13955 
13956 /* No more remaining characters. */
13957 if (reqcu_not_found != NULL)
13958   set_jumps(reqcu_not_found, LABEL());
13959 
13960 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13961   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13962 
13963 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13964 JUMPTO(SLJIT_JUMP, common->quit_label);
13965 
13966 flush_stubs(common);
13967 
13968 if (common->might_be_empty)
13969   {
13970   JUMPHERE(empty_match);
13971   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13972   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13973   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13974   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13975   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13976   JUMPTO(SLJIT_ZERO, empty_match_found_label);
13977   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13978   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13979   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13980   }
13981 
13982 common->fast_forward_bc_ptr = NULL;
13983 common->early_fail_start_ptr = 0;
13984 common->early_fail_end_ptr = 0;
13985 common->currententry = common->entries;
13986 common->local_quit_available = TRUE;
13987 quit_label = common->quit_label;
13988 if (common->currententry != NULL)
13989   {
13990   /* A free bit for each private data. */
13991   common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
13992   SLJIT_ASSERT(common->recurse_bitset_size > 0);
13993   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13994 
13995   if (common->recurse_bitset != NULL)
13996     {
13997     do
13998       {
13999       /* Might add new entries. */
14000       compile_recurse(common);
14001       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14002         break;
14003       flush_stubs(common);
14004       common->currententry = common->currententry->next;
14005       }
14006     while (common->currententry != NULL);
14007 
14008     SLJIT_FREE(common->recurse_bitset, allocator_data);
14009     }
14010 
14011   if (common->currententry != NULL)
14012     {
14013     /* The common->recurse_bitset has been freed. */
14014     SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14015 
14016     sljit_free_compiler(compiler);
14017     SLJIT_FREE(common->optimized_cbracket, allocator_data);
14018     SLJIT_FREE(common->private_data_ptrs, allocator_data);
14019     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14020     return PCRE2_ERROR_NOMEMORY;
14021     }
14022   }
14023 common->local_quit_available = FALSE;
14024 common->quit_label = quit_label;
14025 
14026 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14027 /* This is a (really) rare case. */
14028 set_jumps(common->stackalloc, LABEL());
14029 /* RETURN_ADDR is not a saved register. */
14030 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14031 
14032 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14033 
14034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14035 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14036 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14037 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14038 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14039 
14040 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
14041 
14042 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14043 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14044 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14046 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14047 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14048 
14049 /* Allocation failed. */
14050 JUMPHERE(jump);
14051 /* We break the return address cache here, but this is a really rare case. */
14052 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14053 JUMPTO(SLJIT_JUMP, common->quit_label);
14054 
14055 /* Call limit reached. */
14056 set_jumps(common->calllimit, LABEL());
14057 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14058 JUMPTO(SLJIT_JUMP, common->quit_label);
14059 
14060 if (common->revertframes != NULL)
14061   {
14062   set_jumps(common->revertframes, LABEL());
14063   do_revertframes(common);
14064   }
14065 if (common->wordboundary != NULL)
14066   {
14067   set_jumps(common->wordboundary, LABEL());
14068   check_wordboundary(common);
14069   }
14070 if (common->anynewline != NULL)
14071   {
14072   set_jumps(common->anynewline, LABEL());
14073   check_anynewline(common);
14074   }
14075 if (common->hspace != NULL)
14076   {
14077   set_jumps(common->hspace, LABEL());
14078   check_hspace(common);
14079   }
14080 if (common->vspace != NULL)
14081   {
14082   set_jumps(common->vspace, LABEL());
14083   check_vspace(common);
14084   }
14085 if (common->casefulcmp != NULL)
14086   {
14087   set_jumps(common->casefulcmp, LABEL());
14088   do_casefulcmp(common);
14089   }
14090 if (common->caselesscmp != NULL)
14091   {
14092   set_jumps(common->caselesscmp, LABEL());
14093   do_caselesscmp(common);
14094   }
14095 if (common->reset_match != NULL)
14096   {
14097   set_jumps(common->reset_match, LABEL());
14098   do_reset_match(common, (re->top_bracket + 1) * 2);
14099   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14100   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14101   JUMPTO(SLJIT_JUMP, reset_match_label);
14102   }
14103 #ifdef SUPPORT_UNICODE
14104 #if PCRE2_CODE_UNIT_WIDTH == 8
14105 if (common->utfreadchar != NULL)
14106   {
14107   set_jumps(common->utfreadchar, LABEL());
14108   do_utfreadchar(common);
14109   }
14110 if (common->utfreadtype8 != NULL)
14111   {
14112   set_jumps(common->utfreadtype8, LABEL());
14113   do_utfreadtype8(common);
14114   }
14115 if (common->utfpeakcharback != NULL)
14116   {
14117   set_jumps(common->utfpeakcharback, LABEL());
14118   do_utfpeakcharback(common);
14119   }
14120 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14121 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14122 if (common->utfreadchar_invalid != NULL)
14123   {
14124   set_jumps(common->utfreadchar_invalid, LABEL());
14125   do_utfreadchar_invalid(common);
14126   }
14127 if (common->utfreadnewline_invalid != NULL)
14128   {
14129   set_jumps(common->utfreadnewline_invalid, LABEL());
14130   do_utfreadnewline_invalid(common);
14131   }
14132 if (common->utfmoveback_invalid)
14133   {
14134   set_jumps(common->utfmoveback_invalid, LABEL());
14135   do_utfmoveback_invalid(common);
14136   }
14137 if (common->utfpeakcharback_invalid)
14138   {
14139   set_jumps(common->utfpeakcharback_invalid, LABEL());
14140   do_utfpeakcharback_invalid(common);
14141   }
14142 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14143 if (common->getucd != NULL)
14144   {
14145   set_jumps(common->getucd, LABEL());
14146   do_getucd(common);
14147   }
14148 if (common->getucdtype != NULL)
14149   {
14150   set_jumps(common->getucdtype, LABEL());
14151   do_getucdtype(common);
14152   }
14153 #endif /* SUPPORT_UNICODE */
14154 
14155 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14156 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14157 
14158 executable_func = sljit_generate_code(compiler);
14159 executable_size = sljit_get_generated_code_size(compiler);
14160 sljit_free_compiler(compiler);
14161 
14162 if (executable_func == NULL)
14163   {
14164   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14165   return PCRE2_ERROR_NOMEMORY;
14166   }
14167 
14168 /* Reuse the function descriptor if possible. */
14169 if (re->executable_jit != NULL)
14170   functions = (executable_functions *)re->executable_jit;
14171 else
14172   {
14173   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14174   if (functions == NULL)
14175     {
14176     /* This case is highly unlikely since we just recently
14177     freed a lot of memory. Not impossible though. */
14178     sljit_free_code(executable_func, NULL);
14179     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180     return PCRE2_ERROR_NOMEMORY;
14181     }
14182   memset(functions, 0, sizeof(executable_functions));
14183   functions->top_bracket = re->top_bracket + 1;
14184   functions->limit_match = re->limit_match;
14185   re->executable_jit = functions;
14186   }
14187 
14188 /* Turn mode into an index. */
14189 if (mode == PCRE2_JIT_COMPLETE)
14190   mode = 0;
14191 else
14192   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14193 
14194 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14195 functions->executable_funcs[mode] = executable_func;
14196 functions->read_only_data_heads[mode] = common->read_only_data_head;
14197 functions->executable_sizes[mode] = executable_size;
14198 return 0;
14199 }
14200 
14201 #endif
14202 
14203 /*************************************************
14204 *        JIT compile a Regular Expression        *
14205 *************************************************/
14206 
14207 /* This function used JIT to convert a previously-compiled pattern into machine
14208 code.
14209 
14210 Arguments:
14211   code          a compiled pattern
14212   options       JIT option bits
14213 
14214 Returns:        0: success or (*NOJIT) was used
14215                <0: an error code
14216 */
14217 
14218 #define PUBLIC_JIT_COMPILE_OPTIONS \
14219   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14220 
14221 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14222 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14223 {
14224 pcre2_real_code *re = (pcre2_real_code *)code;
14225 #ifdef SUPPORT_JIT
14226 executable_functions *functions;
14227 static int executable_allocator_is_working = 0;
14228 #endif
14229 
14230 if (code == NULL)
14231   return PCRE2_ERROR_NULL;
14232 
14233 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14234   return PCRE2_ERROR_JIT_BADOPTION;
14235 
14236 /* Support for invalid UTF was first introduced in JIT, with the option
14237 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14238 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14239 preferred feature, with the earlier option deprecated. However, for backward
14240 compatibility, if the earlier option is set, it forces the new option so that
14241 if JIT matching falls back to the interpreter, there is still support for
14242 invalid UTF. However, if this function has already been successfully called
14243 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14244 non-invalid-supporting JIT code was compiled), give an error.
14245 
14246 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14247 actions are needed:
14248 
14249   1. Remove the definition from pcre2.h.in and from the list in
14250      PUBLIC_JIT_COMPILE_OPTIONS above.
14251 
14252   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14253 
14254   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14255 
14256   4. Delete the following short block of code. The setting of "re" and
14257      "functions" can be moved into the JIT-only block below, but if that is
14258      done, (void)re and (void)functions will be needed in the non-JIT case, to
14259      avoid compiler warnings.
14260 */
14261 
14262 #ifdef SUPPORT_JIT
14263 functions = (executable_functions *)re->executable_jit;
14264 #endif
14265 
14266 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14267   {
14268   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14269     {
14270 #ifdef SUPPORT_JIT
14271     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14272 #endif
14273     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14274     }
14275   }
14276 
14277 /* The above tests are run with and without JIT support. This means that
14278 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14279 interpreter support) even in the absence of JIT. But now, if there is no JIT
14280 support, give an error return. */
14281 
14282 #ifndef SUPPORT_JIT
14283 return PCRE2_ERROR_JIT_BADOPTION;
14284 #else  /* SUPPORT_JIT */
14285 
14286 /* There is JIT support. Do the necessary. */
14287 
14288 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14289 
14290 if (executable_allocator_is_working == 0)
14291   {
14292   /* Checks whether the executable allocator is working. This check
14293      might run multiple times in multi-threaded environments, but the
14294      result should not be affected by it. */
14295   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14296 
14297   executable_allocator_is_working = -1;
14298 
14299   if (ptr != NULL)
14300     {
14301     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14302     executable_allocator_is_working = 1;
14303     }
14304   }
14305 
14306 if (executable_allocator_is_working < 0)
14307   return PCRE2_ERROR_NOMEMORY;
14308 
14309 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14310   options |= PCRE2_JIT_INVALID_UTF;
14311 
14312 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14313     || functions->executable_funcs[0] == NULL)) {
14314   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14315   int result = jit_compile(code, options & ~excluded_options);
14316   if (result != 0)
14317     return result;
14318   }
14319 
14320 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14321     || functions->executable_funcs[1] == NULL)) {
14322   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14323   int result = jit_compile(code, options & ~excluded_options);
14324   if (result != 0)
14325     return result;
14326   }
14327 
14328 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14329     || functions->executable_funcs[2] == NULL)) {
14330   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14331   int result = jit_compile(code, options & ~excluded_options);
14332   if (result != 0)
14333     return result;
14334   }
14335 
14336 return 0;
14337 
14338 #endif  /* SUPPORT_JIT */
14339 }
14340 
14341 /* JIT compiler uses an all-in-one approach. This improves security,
14342    since the code generator functions are not exported. */
14343 
14344 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14345 
14346 #include "pcre2_jit_match.c"
14347 #include "pcre2_jit_misc.c"
14348 
14349 /* End of pcre2_jit_compile.c */
14350