• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2019 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #ifdef SUPPORT_JIT
49 
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53 
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57 
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63 
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66 
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72 
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78 
79 #include "sljit/sljitLir.c"
80 
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84 
85 /* Defines for debugging purposes. */
86 
87 /* 1 - Use unoptimized capturing brackets.
88    2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90 
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93 
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97 
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101 
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106 
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109 
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115 
116   'ab' - 'a' and 'b' regexps are concatenated
117   'a+' - 'a' is the sub-expression of the '+' operator
118 
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124 
125  Greedy star operator (*) :
126    Matching path: match happens.
127    Backtrack path: match failed.
128  Non-greedy star operator (*?) :
129    Matching path: no need to perform a match.
130    Backtrack path: match is required.
131 
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135 
136    A(B|C)D
137 
138 The generated code will be the following:
139 
140  A matching path
141  '(' matching path (pushing arguments to the stack)
142  B matching path
143  ')' matching path (pushing arguments to the stack)
144  D matching path
145  return with successful match
146 
147  D backtrack path
148  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149  B backtrack path
150  C expected path
151  jump to D matching path
152  C backtrack path
153  A backtrack path
154 
155  Notice, that the order of backtrack code paths are the opposite of the fast
156  code paths. In this way the topmost value on the stack is always belong
157  to the current backtrack code path. The backtrack path must check
158  whether there is a next alternative. If so, it needs to jump back to
159  the matching path eventually. Otherwise it needs to clear out its own stack
160  frame and continue the execution on the backtrack code paths.
161 */
162 
163 /*
164 Saved stack frames:
165 
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170 
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173 
174 Thus we can restore the private data to a particular point in the stack.
175 */
176 
177 typedef struct jit_arguments {
178   /* Pointers first. */
179   struct sljit_stack *stack;
180   PCRE2_SPTR str;
181   PCRE2_SPTR begin;
182   PCRE2_SPTR end;
183   pcre2_match_data *match_data;
184   PCRE2_SPTR startchar_ptr;
185   PCRE2_UCHAR *mark_ptr;
186   int (*callout)(pcre2_callout_block *, void *);
187   void *callout_data;
188   /* Everything else after. */
189   sljit_uw offset_limit;
190   sljit_u32 limit_match;
191   sljit_u32 oveccount;
192   sljit_u32 options;
193 } jit_arguments;
194 
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196 
197 typedef struct executable_functions {
198   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201   sljit_u32 top_bracket;
202   sljit_u32 limit_match;
203 } executable_functions;
204 
205 typedef struct jump_list {
206   struct sljit_jump *jump;
207   struct jump_list *next;
208 } jump_list;
209 
210 typedef struct stub_list {
211   struct sljit_jump *start;
212   struct sljit_label *quit;
213   struct stub_list *next;
214 } stub_list;
215 
216 enum frame_types {
217   no_frame = -1,
218   no_stack = -2
219 };
220 
221 enum control_types {
222   type_mark = 0,
223   type_then_trap = 1
224 };
225 
226 enum  early_fail_types {
227   type_skip = 0,
228   type_fail = 1,
229   type_fail_range = 2
230 };
231 
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233 
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239   /* Concatenation stack. */
240   struct backtrack_common *prev;
241   jump_list *nextbacktracks;
242   /* Internal stack (for component operators). */
243   struct backtrack_common *top;
244   jump_list *topbacktracks;
245   /* Opcode pointer. */
246   PCRE2_SPTR cc;
247 } backtrack_common;
248 
249 typedef struct assert_backtrack {
250   backtrack_common common;
251   jump_list *condfailed;
252   /* Less than 0 if a frame is not needed. */
253   int framesize;
254   /* Points to our private memory word on the stack. */
255   int private_data_ptr;
256   /* For iterators. */
257   struct sljit_label *matchingpath;
258 } assert_backtrack;
259 
260 typedef struct bracket_backtrack {
261   backtrack_common common;
262   /* Where to coninue if an alternative is successfully matched. */
263   struct sljit_label *alternative_matchingpath;
264   /* For rmin and rmax iterators. */
265   struct sljit_label *recursive_matchingpath;
266   /* For greedy ? operator. */
267   struct sljit_label *zero_matchingpath;
268   /* Contains the branches of a failed condition. */
269   union {
270     /* Both for OP_COND, OP_SCOND. */
271     jump_list *condfailed;
272     assert_backtrack *assert;
273     /* For OP_ONCE. Less than 0 if not needed. */
274     int framesize;
275     /* For brackets with >3 alternatives. */
276     struct sljit_put_label *matching_put_label;
277   } u;
278   /* Points to our private memory word on the stack. */
279   int private_data_ptr;
280 } bracket_backtrack;
281 
282 typedef struct bracketpos_backtrack {
283   backtrack_common common;
284   /* Points to our private memory word on the stack. */
285   int private_data_ptr;
286   /* Reverting stack is needed. */
287   int framesize;
288   /* Allocated stack size. */
289   int stacksize;
290 } bracketpos_backtrack;
291 
292 typedef struct braminzero_backtrack {
293   backtrack_common common;
294   struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296 
297 typedef struct char_iterator_backtrack {
298   backtrack_common common;
299   /* Next iteration. */
300   struct sljit_label *matchingpath;
301   union {
302     jump_list *backtracks;
303     struct {
304       unsigned int othercasebit;
305       PCRE2_UCHAR chr;
306       BOOL enabled;
307     } charpos;
308   } u;
309 } char_iterator_backtrack;
310 
311 typedef struct ref_iterator_backtrack {
312   backtrack_common common;
313   /* Next iteration. */
314   struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316 
317 typedef struct recurse_entry {
318   struct recurse_entry *next;
319   /* Contains the function entry label. */
320   struct sljit_label *entry_label;
321   /* Contains the function entry label. */
322   struct sljit_label *backtrack_label;
323   /* Collects the entry calls until the function is not created. */
324   jump_list *entry_calls;
325   /* Collects the backtrack calls until the function is not created. */
326   jump_list *backtrack_calls;
327   /* Points to the starting opcode. */
328   sljit_sw start;
329 } recurse_entry;
330 
331 typedef struct recurse_backtrack {
332   backtrack_common common;
333   /* Return to the matching path. */
334   struct sljit_label *matchingpath;
335   /* Recursive pattern. */
336   recurse_entry *entry;
337   /* Pattern is inlined. */
338   BOOL inlined_pattern;
339 } recurse_backtrack;
340 
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342 
343 typedef struct then_trap_backtrack {
344   backtrack_common common;
345   /* If then_trap is not NULL, this structure contains the real
346   then_trap for the backtracking path. */
347   struct then_trap_backtrack *then_trap;
348   /* Points to the starting opcode. */
349   sljit_sw start;
350   /* Exit point for the then opcodes of this alternative. */
351   jump_list *quit;
352   /* Frame size of the current alternative. */
353   int framesize;
354 } then_trap_backtrack;
355 
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358 
359 typedef struct fast_forward_char_data {
360   /* Number of characters in the chars array, 255 for any character. */
361   sljit_u8 count;
362   /* Number of last UTF-8 characters in the chars array. */
363   sljit_u8 last_count;
364   /* Available characters in the current position. */
365   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367 
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370 
371 typedef struct compiler_common {
372   /* The sljit ceneric compiler. */
373   struct sljit_compiler *compiler;
374   /* Compiled regular expression. */
375   pcre2_real_code *re;
376   /* First byte code. */
377   PCRE2_SPTR start;
378   /* Maps private data offset to each opcode. */
379   sljit_s32 *private_data_ptrs;
380   /* Chain list of read-only data ptrs. */
381   void *read_only_data_head;
382   /* Tells whether the capturing bracket is optimized. */
383   sljit_u8 *optimized_cbracket;
384   /* Tells whether the starting offset is a target of then. */
385   sljit_u8 *then_offsets;
386   /* Current position where a THEN must jump. */
387   then_trap_backtrack *then_trap;
388   /* Starting offset of private data for capturing brackets. */
389   sljit_s32 cbra_ptr;
390   /* Output vector starting point. Must be divisible by 2. */
391   sljit_s32 ovector_start;
392   /* Points to the starting character of the current match. */
393   sljit_s32 start_ptr;
394   /* Last known position of the requested byte. */
395   sljit_s32 req_char_ptr;
396   /* Head of the last recursion. */
397   sljit_s32 recursive_head_ptr;
398   /* First inspected character for partial matching.
399      (Needed for avoiding zero length partial matches.) */
400   sljit_s32 start_used_ptr;
401   /* Starting pointer for partial soft matches. */
402   sljit_s32 hit_start;
403   /* Pointer of the match end position. */
404   sljit_s32 match_end_ptr;
405   /* Points to the marked string. */
406   sljit_s32 mark_ptr;
407   /* Recursive control verb management chain. */
408   sljit_s32 control_head_ptr;
409   /* Points to the last matched capture block index. */
410   sljit_s32 capture_last_ptr;
411   /* Fast forward skipping byte code pointer. */
412   PCRE2_SPTR fast_forward_bc_ptr;
413   /* Locals used by fast fail optimization. */
414   sljit_s32 early_fail_start_ptr;
415   sljit_s32 early_fail_end_ptr;
416 
417   /* Flipped and lower case tables. */
418   const sljit_u8 *fcc;
419   sljit_sw lcc;
420   /* Mode can be PCRE2_JIT_COMPLETE and others. */
421   int mode;
422   /* TRUE, when empty match is accepted for partial matching. */
423   BOOL allow_empty_partial;
424   /* TRUE, when minlength is greater than 0. */
425   BOOL might_be_empty;
426   /* \K is found in the pattern. */
427   BOOL has_set_som;
428   /* (*SKIP:arg) is found in the pattern. */
429   BOOL has_skip_arg;
430   /* (*THEN) is found in the pattern. */
431   BOOL has_then;
432   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433   BOOL has_skip_in_assert_back;
434   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435   BOOL local_quit_available;
436   /* Currently in a positive assertion. */
437   BOOL in_positive_assertion;
438   /* Newline control. */
439   int nltype;
440   sljit_u32 nlmax;
441   sljit_u32 nlmin;
442   int newline;
443   int bsr_nltype;
444   sljit_u32 bsr_nlmax;
445   sljit_u32 bsr_nlmin;
446   /* Dollar endonly. */
447   int endonly;
448   /* Tables. */
449   sljit_sw ctypes;
450   /* Named capturing brackets. */
451   PCRE2_SPTR name_table;
452   sljit_sw name_count;
453   sljit_sw name_entry_size;
454 
455   /* Labels and jump lists. */
456   struct sljit_label *partialmatchlabel;
457   struct sljit_label *quit_label;
458   struct sljit_label *abort_label;
459   struct sljit_label *accept_label;
460   struct sljit_label *ff_newline_shortcut;
461   stub_list *stubs;
462   recurse_entry *entries;
463   recurse_entry *currententry;
464   jump_list *partialmatch;
465   jump_list *quit;
466   jump_list *positive_assertion_quit;
467   jump_list *abort;
468   jump_list *failed_match;
469   jump_list *accept;
470   jump_list *calllimit;
471   jump_list *stackalloc;
472   jump_list *revertframes;
473   jump_list *wordboundary;
474   jump_list *anynewline;
475   jump_list *hspace;
476   jump_list *vspace;
477   jump_list *casefulcmp;
478   jump_list *caselesscmp;
479   jump_list *reset_match;
480   BOOL unset_backref;
481   BOOL alt_circumflex;
482 #ifdef SUPPORT_UNICODE
483   BOOL utf;
484   BOOL invalid_utf;
485   BOOL ucp;
486   /* Points to saving area for iref. */
487   sljit_s32 iref_ptr;
488   jump_list *getucd;
489   jump_list *getucdtype;
490 #if PCRE2_CODE_UNIT_WIDTH == 8
491   jump_list *utfreadchar;
492   jump_list *utfreadtype8;
493   jump_list *utfpeakcharback;
494 #endif
495 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496   jump_list *utfreadchar_invalid;
497   jump_list *utfreadnewline_invalid;
498   jump_list *utfmoveback_invalid;
499   jump_list *utfpeakcharback_invalid;
500 #endif
501 #endif /* SUPPORT_UNICODE */
502 } compiler_common;
503 
504 /* For byte_sequence_compare. */
505 
506 typedef struct compare_context {
507   int length;
508   int sourcereg;
509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510   int ucharptr;
511   union {
512     sljit_s32 asint;
513     sljit_u16 asushort;
514 #if PCRE2_CODE_UNIT_WIDTH == 8
515     sljit_u8 asbyte;
516     sljit_u8 asuchars[4];
517 #elif PCRE2_CODE_UNIT_WIDTH == 16
518     sljit_u16 asuchars[2];
519 #elif PCRE2_CODE_UNIT_WIDTH == 32
520     sljit_u32 asuchars[1];
521 #endif
522   } c;
523   union {
524     sljit_s32 asint;
525     sljit_u16 asushort;
526 #if PCRE2_CODE_UNIT_WIDTH == 8
527     sljit_u8 asbyte;
528     sljit_u8 asuchars[4];
529 #elif PCRE2_CODE_UNIT_WIDTH == 16
530     sljit_u16 asuchars[2];
531 #elif PCRE2_CODE_UNIT_WIDTH == 32
532     sljit_u32 asuchars[1];
533 #endif
534   } oc;
535 #endif
536 } compare_context;
537 
538 /* Undefine sljit macros. */
539 #undef CMP
540 
541 /* Used for accessing the elements of the stack. */
542 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
543 
544 #ifdef SLJIT_PREF_SHIFT_REG
545 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546 /* Nothing. */
547 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548 #define SHIFT_REG_IS_R3
549 #else
550 #error "Unsupported shift register"
551 #endif
552 #endif
553 
554 #define TMP1          SLJIT_R0
555 #ifdef SHIFT_REG_IS_R3
556 #define TMP2          SLJIT_R3
557 #define TMP3          SLJIT_R2
558 #else
559 #define TMP2          SLJIT_R2
560 #define TMP3          SLJIT_R3
561 #endif
562 #define STR_PTR       SLJIT_R1
563 #define STR_END       SLJIT_S0
564 #define STACK_TOP     SLJIT_S1
565 #define STACK_LIMIT   SLJIT_S2
566 #define COUNT_MATCH   SLJIT_S3
567 #define ARGUMENTS     SLJIT_S4
568 #define RETURN_ADDR   SLJIT_R4
569 
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 #define HAS_VIRTUAL_REGISTERS 1
572 #else
573 #define HAS_VIRTUAL_REGISTERS 0
574 #endif
575 
576 /* Local space layout. */
577 /* These two locals can be used by the current opcode. */
578 #define LOCALS0          (0 * sizeof(sljit_sw))
579 #define LOCALS1          (1 * sizeof(sljit_sw))
580 /* Two local variables for possessive quantifiers (char1 cannot use them). */
581 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
582 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
583 /* Max limit of recursions. */
584 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
585 /* The output vector is stored on the stack, and contains pointers
586 to characters. The vector data is divided into two groups: the first
587 group contains the start / end character pointers, and the second is
588 the start pointers when the end of the capturing group has not yet reached. */
589 #define OVECTOR_START    (common->ovector_start)
590 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593 
594 #if PCRE2_CODE_UNIT_WIDTH == 8
595 #define MOV_UCHAR  SLJIT_MOV_U8
596 #define IN_UCHARS(x) (x)
597 #elif PCRE2_CODE_UNIT_WIDTH == 16
598 #define MOV_UCHAR  SLJIT_MOV_U16
599 #define UCHAR_SHIFT (1)
600 #define IN_UCHARS(x) ((x) * 2)
601 #elif PCRE2_CODE_UNIT_WIDTH == 32
602 #define MOV_UCHAR  SLJIT_MOV_U32
603 #define UCHAR_SHIFT (2)
604 #define IN_UCHARS(x) ((x) * 4)
605 #else
606 #error Unsupported compiling mode
607 #endif
608 
609 /* Shortcuts. */
610 #define DEFINE_COMPILER \
611   struct sljit_compiler *compiler = common->compiler
612 #define OP1(op, dst, dstw, src, srcw) \
613   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616 #define OP_SRC(op, src, srcw) \
617   sljit_emit_op_src(compiler, (op), (src), (srcw))
618 #define LABEL() \
619   sljit_emit_label(compiler)
620 #define JUMP(type) \
621   sljit_emit_jump(compiler, (type))
622 #define JUMPTO(type, label) \
623   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624 #define JUMPHERE(jump) \
625   sljit_set_label((jump), sljit_emit_label(compiler))
626 #define SET_LABEL(jump, label) \
627   sljit_set_label((jump), (label))
628 #define CMP(type, src1, src1w, src2, src2w) \
629   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630 #define CMPTO(type, src1, src1w, src2, src2w, label) \
631   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632 #define OP_FLAGS(op, dst, dstw, type) \
633   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634 #define CMOV(type, dst_reg, src, srcw) \
635   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636 #define GET_LOCAL_BASE(dst, dstw, offset) \
637   sljit_get_local_base(compiler, (dst), (dstw), (offset))
638 
639 #define READ_CHAR_MAX 0x7fffffff
640 
641 #define INVALID_UTF_CHAR -1
642 #define UNASSIGNED_UTF_CHAR 888
643 
644 #if defined SUPPORT_UNICODE
645 #if PCRE2_CODE_UNIT_WIDTH == 8
646 
647 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648   { \
649   if (ptr[0] <= 0x7f) \
650     c = *ptr++; \
651   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652     { \
653     c = ptr[1] - 0x80; \
654     \
655     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656       { \
657       c |= (ptr[0] - 0xc0) << 6; \
658       ptr += 2; \
659       } \
660     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661       { \
662       c = c << 6 | (ptr[2] - 0x80); \
663       \
664       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665         { \
666         c |= (ptr[0] - 0xe0) << 12; \
667         ptr += 3; \
668         \
669         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670           { \
671           invalid_action; \
672           } \
673         } \
674       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675         { \
676         c = c << 6 | (ptr[3] - 0x80); \
677         \
678         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679           { \
680           c |= (ptr[0] - 0xf0) << 18; \
681           ptr += 4; \
682           \
683           if (c >= 0x110000 || c < 0x10000) \
684             { \
685             invalid_action; \
686             } \
687           } \
688         else \
689           { \
690           invalid_action; \
691           } \
692         } \
693       else \
694         { \
695         invalid_action; \
696         } \
697       } \
698     else \
699       { \
700       invalid_action; \
701       } \
702     } \
703   else \
704     { \
705     invalid_action; \
706     } \
707   }
708 
709 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710   { \
711   c = ptr[-1]; \
712   if (c <= 0x7f) \
713     ptr--; \
714   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715     { \
716     c -= 0x80; \
717     \
718     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719       { \
720       c |= (ptr[-2] - 0xc0) << 6; \
721       ptr -= 2; \
722       } \
723     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724       { \
725       c = c << 6 | (ptr[-2] - 0x80); \
726       \
727       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728         { \
729         c |= (ptr[-3] - 0xe0) << 12; \
730         ptr -= 3; \
731         \
732         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733           { \
734           invalid_action; \
735           } \
736         } \
737       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738         { \
739         c = c << 6 | (ptr[-3] - 0x80); \
740         \
741         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742           { \
743           c |= (ptr[-4] - 0xf0) << 18; \
744           ptr -= 4; \
745           \
746           if (c >= 0x110000 || c < 0x10000) \
747             { \
748             invalid_action; \
749             } \
750           } \
751         else \
752           { \
753           invalid_action; \
754           } \
755         } \
756       else \
757         { \
758         invalid_action; \
759         } \
760       } \
761     else \
762       { \
763       invalid_action; \
764       } \
765     } \
766   else \
767     { \
768     invalid_action; \
769     } \
770   }
771 
772 #elif PCRE2_CODE_UNIT_WIDTH == 16
773 
774 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775   { \
776   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777     c = *ptr++; \
778   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779     { \
780     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781     ptr += 2; \
782     } \
783   else \
784     { \
785     invalid_action; \
786     } \
787   }
788 
789 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790   { \
791   c = ptr[-1]; \
792   if (c < 0xd800 || c >= 0xe000) \
793     ptr--; \
794   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795     { \
796     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797     ptr -= 2; \
798     } \
799   else \
800     { \
801     invalid_action; \
802     } \
803   }
804 
805 
806 #elif PCRE2_CODE_UNIT_WIDTH == 32
807 
808 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809   { \
810   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811     c = *ptr++; \
812   else \
813     { \
814     invalid_action; \
815     } \
816   }
817 
818 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819   { \
820   c = ptr[-1]; \
821   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822     ptr--; \
823   else \
824     { \
825     invalid_action; \
826     } \
827   }
828 
829 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830 #endif /* SUPPORT_UNICODE */
831 
bracketend(PCRE2_SPTR cc)832 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833 {
834 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835 do cc += GET(cc, 1); while (*cc == OP_ALT);
836 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837 cc += 1 + LINK_SIZE;
838 return cc;
839 }
840 
no_alternatives(PCRE2_SPTR cc)841 static int no_alternatives(PCRE2_SPTR cc)
842 {
843 int count = 0;
844 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845 do
846   {
847   cc += GET(cc, 1);
848   count++;
849   }
850 while (*cc == OP_ALT);
851 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852 return count;
853 }
854 
855 /* Functions whose might need modification for all new supported opcodes:
856  next_opcode
857  check_opcode_types
858  set_private_data_ptrs
859  get_framesize
860  init_frame
861  get_recurse_data_length
862  copy_recurse_data
863  compile_matchingpath
864  compile_backtrackingpath
865 */
866 
next_opcode(compiler_common * common,PCRE2_SPTR cc)867 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868 {
869 SLJIT_UNUSED_ARG(common);
870 switch(*cc)
871   {
872   case OP_SOD:
873   case OP_SOM:
874   case OP_SET_SOM:
875   case OP_NOT_WORD_BOUNDARY:
876   case OP_WORD_BOUNDARY:
877   case OP_NOT_DIGIT:
878   case OP_DIGIT:
879   case OP_NOT_WHITESPACE:
880   case OP_WHITESPACE:
881   case OP_NOT_WORDCHAR:
882   case OP_WORDCHAR:
883   case OP_ANY:
884   case OP_ALLANY:
885   case OP_NOTPROP:
886   case OP_PROP:
887   case OP_ANYNL:
888   case OP_NOT_HSPACE:
889   case OP_HSPACE:
890   case OP_NOT_VSPACE:
891   case OP_VSPACE:
892   case OP_EXTUNI:
893   case OP_EODN:
894   case OP_EOD:
895   case OP_CIRC:
896   case OP_CIRCM:
897   case OP_DOLL:
898   case OP_DOLLM:
899   case OP_CRSTAR:
900   case OP_CRMINSTAR:
901   case OP_CRPLUS:
902   case OP_CRMINPLUS:
903   case OP_CRQUERY:
904   case OP_CRMINQUERY:
905   case OP_CRRANGE:
906   case OP_CRMINRANGE:
907   case OP_CRPOSSTAR:
908   case OP_CRPOSPLUS:
909   case OP_CRPOSQUERY:
910   case OP_CRPOSRANGE:
911   case OP_CLASS:
912   case OP_NCLASS:
913   case OP_REF:
914   case OP_REFI:
915   case OP_DNREF:
916   case OP_DNREFI:
917   case OP_RECURSE:
918   case OP_CALLOUT:
919   case OP_ALT:
920   case OP_KET:
921   case OP_KETRMAX:
922   case OP_KETRMIN:
923   case OP_KETRPOS:
924   case OP_REVERSE:
925   case OP_ASSERT:
926   case OP_ASSERT_NOT:
927   case OP_ASSERTBACK:
928   case OP_ASSERTBACK_NOT:
929   case OP_ASSERT_NA:
930   case OP_ASSERTBACK_NA:
931   case OP_ONCE:
932   case OP_SCRIPT_RUN:
933   case OP_BRA:
934   case OP_BRAPOS:
935   case OP_CBRA:
936   case OP_CBRAPOS:
937   case OP_COND:
938   case OP_SBRA:
939   case OP_SBRAPOS:
940   case OP_SCBRA:
941   case OP_SCBRAPOS:
942   case OP_SCOND:
943   case OP_CREF:
944   case OP_DNCREF:
945   case OP_RREF:
946   case OP_DNRREF:
947   case OP_FALSE:
948   case OP_TRUE:
949   case OP_BRAZERO:
950   case OP_BRAMINZERO:
951   case OP_BRAPOSZERO:
952   case OP_PRUNE:
953   case OP_SKIP:
954   case OP_THEN:
955   case OP_COMMIT:
956   case OP_FAIL:
957   case OP_ACCEPT:
958   case OP_ASSERT_ACCEPT:
959   case OP_CLOSE:
960   case OP_SKIPZERO:
961   return cc + PRIV(OP_lengths)[*cc];
962 
963   case OP_CHAR:
964   case OP_CHARI:
965   case OP_NOT:
966   case OP_NOTI:
967   case OP_STAR:
968   case OP_MINSTAR:
969   case OP_PLUS:
970   case OP_MINPLUS:
971   case OP_QUERY:
972   case OP_MINQUERY:
973   case OP_UPTO:
974   case OP_MINUPTO:
975   case OP_EXACT:
976   case OP_POSSTAR:
977   case OP_POSPLUS:
978   case OP_POSQUERY:
979   case OP_POSUPTO:
980   case OP_STARI:
981   case OP_MINSTARI:
982   case OP_PLUSI:
983   case OP_MINPLUSI:
984   case OP_QUERYI:
985   case OP_MINQUERYI:
986   case OP_UPTOI:
987   case OP_MINUPTOI:
988   case OP_EXACTI:
989   case OP_POSSTARI:
990   case OP_POSPLUSI:
991   case OP_POSQUERYI:
992   case OP_POSUPTOI:
993   case OP_NOTSTAR:
994   case OP_NOTMINSTAR:
995   case OP_NOTPLUS:
996   case OP_NOTMINPLUS:
997   case OP_NOTQUERY:
998   case OP_NOTMINQUERY:
999   case OP_NOTUPTO:
1000   case OP_NOTMINUPTO:
1001   case OP_NOTEXACT:
1002   case OP_NOTPOSSTAR:
1003   case OP_NOTPOSPLUS:
1004   case OP_NOTPOSQUERY:
1005   case OP_NOTPOSUPTO:
1006   case OP_NOTSTARI:
1007   case OP_NOTMINSTARI:
1008   case OP_NOTPLUSI:
1009   case OP_NOTMINPLUSI:
1010   case OP_NOTQUERYI:
1011   case OP_NOTMINQUERYI:
1012   case OP_NOTUPTOI:
1013   case OP_NOTMINUPTOI:
1014   case OP_NOTEXACTI:
1015   case OP_NOTPOSSTARI:
1016   case OP_NOTPOSPLUSI:
1017   case OP_NOTPOSQUERYI:
1018   case OP_NOTPOSUPTOI:
1019   cc += PRIV(OP_lengths)[*cc];
1020 #ifdef SUPPORT_UNICODE
1021   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022 #endif
1023   return cc;
1024 
1025   /* Special cases. */
1026   case OP_TYPESTAR:
1027   case OP_TYPEMINSTAR:
1028   case OP_TYPEPLUS:
1029   case OP_TYPEMINPLUS:
1030   case OP_TYPEQUERY:
1031   case OP_TYPEMINQUERY:
1032   case OP_TYPEUPTO:
1033   case OP_TYPEMINUPTO:
1034   case OP_TYPEEXACT:
1035   case OP_TYPEPOSSTAR:
1036   case OP_TYPEPOSPLUS:
1037   case OP_TYPEPOSQUERY:
1038   case OP_TYPEPOSUPTO:
1039   return cc + PRIV(OP_lengths)[*cc] - 1;
1040 
1041   case OP_ANYBYTE:
1042 #ifdef SUPPORT_UNICODE
1043   if (common->utf) return NULL;
1044 #endif
1045   return cc + 1;
1046 
1047   case OP_CALLOUT_STR:
1048   return cc + GET(cc, 1 + 2*LINK_SIZE);
1049 
1050 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051   case OP_XCLASS:
1052   return cc + GET(cc, 1);
1053 #endif
1054 
1055   case OP_MARK:
1056   case OP_COMMIT_ARG:
1057   case OP_PRUNE_ARG:
1058   case OP_SKIP_ARG:
1059   case OP_THEN_ARG:
1060   return cc + 1 + 2 + cc[1];
1061 
1062   default:
1063   SLJIT_UNREACHABLE();
1064   return NULL;
1065   }
1066 }
1067 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1068 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069 {
1070 int count;
1071 PCRE2_SPTR slot;
1072 PCRE2_SPTR assert_back_end = cc - 1;
1073 PCRE2_SPTR assert_na_end = cc - 1;
1074 
1075 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076 while (cc < ccend)
1077   {
1078   switch(*cc)
1079     {
1080     case OP_SET_SOM:
1081     common->has_set_som = TRUE;
1082     common->might_be_empty = TRUE;
1083     cc += 1;
1084     break;
1085 
1086     case OP_REFI:
1087 #ifdef SUPPORT_UNICODE
1088     if (common->iref_ptr == 0)
1089       {
1090       common->iref_ptr = common->ovector_start;
1091       common->ovector_start += 3 * sizeof(sljit_sw);
1092       }
1093 #endif /* SUPPORT_UNICODE */
1094     /* Fall through. */
1095     case OP_REF:
1096     common->optimized_cbracket[GET2(cc, 1)] = 0;
1097     cc += 1 + IMM2_SIZE;
1098     break;
1099 
1100     case OP_ASSERT_NA:
1101     case OP_ASSERTBACK_NA:
1102     slot = bracketend(cc);
1103     if (slot > assert_na_end)
1104       assert_na_end = slot;
1105     cc += 1 + LINK_SIZE;
1106     break;
1107 
1108     case OP_CBRAPOS:
1109     case OP_SCBRAPOS:
1110     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111     cc += 1 + LINK_SIZE + IMM2_SIZE;
1112     break;
1113 
1114     case OP_COND:
1115     case OP_SCOND:
1116     /* Only AUTO_CALLOUT can insert this opcode. We do
1117        not intend to support this case. */
1118     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119       return FALSE;
1120     cc += 1 + LINK_SIZE;
1121     break;
1122 
1123     case OP_CREF:
1124     common->optimized_cbracket[GET2(cc, 1)] = 0;
1125     cc += 1 + IMM2_SIZE;
1126     break;
1127 
1128     case OP_DNREF:
1129     case OP_DNREFI:
1130     case OP_DNCREF:
1131     count = GET2(cc, 1 + IMM2_SIZE);
1132     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133     while (count-- > 0)
1134       {
1135       common->optimized_cbracket[GET2(slot, 0)] = 0;
1136       slot += common->name_entry_size;
1137       }
1138     cc += 1 + 2 * IMM2_SIZE;
1139     break;
1140 
1141     case OP_RECURSE:
1142     /* Set its value only once. */
1143     if (common->recursive_head_ptr == 0)
1144       {
1145       common->recursive_head_ptr = common->ovector_start;
1146       common->ovector_start += sizeof(sljit_sw);
1147       }
1148     cc += 1 + LINK_SIZE;
1149     break;
1150 
1151     case OP_CALLOUT:
1152     case OP_CALLOUT_STR:
1153     if (common->capture_last_ptr == 0)
1154       {
1155       common->capture_last_ptr = common->ovector_start;
1156       common->ovector_start += sizeof(sljit_sw);
1157       }
1158     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159     break;
1160 
1161     case OP_ASSERTBACK:
1162     slot = bracketend(cc);
1163     if (slot > assert_back_end)
1164       assert_back_end = slot;
1165     cc += 1 + LINK_SIZE;
1166     break;
1167 
1168     case OP_THEN_ARG:
1169     common->has_then = TRUE;
1170     common->control_head_ptr = 1;
1171     /* Fall through. */
1172 
1173     case OP_COMMIT_ARG:
1174     case OP_PRUNE_ARG:
1175     if (cc < assert_na_end)
1176       return FALSE;
1177     /* Fall through */
1178     case OP_MARK:
1179     if (common->mark_ptr == 0)
1180       {
1181       common->mark_ptr = common->ovector_start;
1182       common->ovector_start += sizeof(sljit_sw);
1183       }
1184     cc += 1 + 2 + cc[1];
1185     break;
1186 
1187     case OP_THEN:
1188     common->has_then = TRUE;
1189     common->control_head_ptr = 1;
1190     cc += 1;
1191     break;
1192 
1193     case OP_SKIP:
1194     if (cc < assert_back_end)
1195       common->has_skip_in_assert_back = TRUE;
1196     if (cc < assert_na_end)
1197       return FALSE;
1198     cc += 1;
1199     break;
1200 
1201     case OP_SKIP_ARG:
1202     common->control_head_ptr = 1;
1203     common->has_skip_arg = TRUE;
1204     if (cc < assert_back_end)
1205       common->has_skip_in_assert_back = TRUE;
1206     if (cc < assert_na_end)
1207       return FALSE;
1208     cc += 1 + 2 + cc[1];
1209     break;
1210 
1211     case OP_PRUNE:
1212     case OP_COMMIT:
1213     case OP_ASSERT_ACCEPT:
1214     if (cc < assert_na_end)
1215       return FALSE;
1216     cc++;
1217     break;
1218 
1219     default:
1220     cc = next_opcode(common, cc);
1221     if (cc == NULL)
1222       return FALSE;
1223     break;
1224     }
1225   }
1226 return TRUE;
1227 }
1228 
1229 #define EARLY_FAIL_ENHANCE_MAX (1 + 1)
1230 
1231 /*
1232 start:
1233   0 - skip / early fail allowed
1234   1 - only early fail with range allowed
1235   >1 - (start - 1) early fail is processed
1236 
1237 return: current number of iterators enhanced with fast fail
1238 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1239 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
1240 {
1241 PCRE2_SPTR next_alt;
1242 PCRE2_SPTR end;
1243 PCRE2_SPTR accelerated_start;
1244 int result = 0;
1245 int count;
1246 BOOL fast_forward_allowed = TRUE;
1247 
1248 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1249 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1250 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1251 
1252 do
1253   {
1254   count = start;
1255   next_alt = cc + GET(cc, 1);
1256   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1257 
1258   while (TRUE)
1259     {
1260     accelerated_start = NULL;
1261 
1262     switch(*cc)
1263       {
1264       case OP_SOD:
1265       case OP_SOM:
1266       case OP_SET_SOM:
1267       case OP_NOT_WORD_BOUNDARY:
1268       case OP_WORD_BOUNDARY:
1269       case OP_EODN:
1270       case OP_EOD:
1271       case OP_CIRC:
1272       case OP_CIRCM:
1273       case OP_DOLL:
1274       case OP_DOLLM:
1275       /* Zero width assertions. */
1276       cc++;
1277       continue;
1278 
1279       case OP_NOT_DIGIT:
1280       case OP_DIGIT:
1281       case OP_NOT_WHITESPACE:
1282       case OP_WHITESPACE:
1283       case OP_NOT_WORDCHAR:
1284       case OP_WORDCHAR:
1285       case OP_ANY:
1286       case OP_ALLANY:
1287       case OP_ANYBYTE:
1288       case OP_NOT_HSPACE:
1289       case OP_HSPACE:
1290       case OP_NOT_VSPACE:
1291       case OP_VSPACE:
1292       fast_forward_allowed = FALSE;
1293       cc++;
1294       continue;
1295 
1296       case OP_ANYNL:
1297       case OP_EXTUNI:
1298       fast_forward_allowed = FALSE;
1299       if (count == 0)
1300         count = 1;
1301       cc++;
1302       continue;
1303 
1304       case OP_NOTPROP:
1305       case OP_PROP:
1306       fast_forward_allowed = FALSE;
1307       cc += 1 + 2;
1308       continue;
1309 
1310       case OP_CHAR:
1311       case OP_CHARI:
1312       case OP_NOT:
1313       case OP_NOTI:
1314       fast_forward_allowed = FALSE;
1315       cc += 2;
1316 #ifdef SUPPORT_UNICODE
1317       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1318 #endif
1319       continue;
1320 
1321       case OP_TYPESTAR:
1322       case OP_TYPEMINSTAR:
1323       case OP_TYPEPLUS:
1324       case OP_TYPEMINPLUS:
1325       case OP_TYPEPOSSTAR:
1326       case OP_TYPEPOSPLUS:
1327       /* The type or prop opcode is skipped in the next iteration. */
1328       cc += 1;
1329 
1330       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1331         {
1332         accelerated_start = cc - 1;
1333         break;
1334         }
1335 
1336       if (count == 0)
1337         count = 1;
1338       fast_forward_allowed = FALSE;
1339       continue;
1340 
1341       case OP_TYPEUPTO:
1342       case OP_TYPEMINUPTO:
1343       case OP_TYPEEXACT:
1344       case OP_TYPEPOSUPTO:
1345       cc += IMM2_SIZE;
1346       /* Fall through */
1347 
1348       case OP_TYPEQUERY:
1349       case OP_TYPEMINQUERY:
1350       case OP_TYPEPOSQUERY:
1351       /* The type or prop opcode is skipped in the next iteration. */
1352       fast_forward_allowed = FALSE;
1353       if (count == 0)
1354         count = 1;
1355       cc += 1;
1356       continue;
1357 
1358       case OP_STAR:
1359       case OP_MINSTAR:
1360       case OP_PLUS:
1361       case OP_MINPLUS:
1362       case OP_POSSTAR:
1363       case OP_POSPLUS:
1364 
1365       case OP_STARI:
1366       case OP_MINSTARI:
1367       case OP_PLUSI:
1368       case OP_MINPLUSI:
1369       case OP_POSSTARI:
1370       case OP_POSPLUSI:
1371 
1372       case OP_NOTSTAR:
1373       case OP_NOTMINSTAR:
1374       case OP_NOTPLUS:
1375       case OP_NOTMINPLUS:
1376       case OP_NOTPOSSTAR:
1377       case OP_NOTPOSPLUS:
1378 
1379       case OP_NOTSTARI:
1380       case OP_NOTMINSTARI:
1381       case OP_NOTPLUSI:
1382       case OP_NOTMINPLUSI:
1383       case OP_NOTPOSSTARI:
1384       case OP_NOTPOSPLUSI:
1385       accelerated_start = cc;
1386       cc += 2;
1387 #ifdef SUPPORT_UNICODE
1388       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1389 #endif
1390       break;
1391 
1392       case OP_UPTO:
1393       case OP_MINUPTO:
1394       case OP_EXACT:
1395       case OP_POSUPTO:
1396       case OP_UPTOI:
1397       case OP_MINUPTOI:
1398       case OP_EXACTI:
1399       case OP_POSUPTOI:
1400       case OP_NOTUPTO:
1401       case OP_NOTMINUPTO:
1402       case OP_NOTEXACT:
1403       case OP_NOTPOSUPTO:
1404       case OP_NOTUPTOI:
1405       case OP_NOTMINUPTOI:
1406       case OP_NOTEXACTI:
1407       case OP_NOTPOSUPTOI:
1408       cc += IMM2_SIZE;
1409       /* Fall through */
1410 
1411       case OP_QUERY:
1412       case OP_MINQUERY:
1413       case OP_POSQUERY:
1414       case OP_QUERYI:
1415       case OP_MINQUERYI:
1416       case OP_POSQUERYI:
1417       case OP_NOTQUERY:
1418       case OP_NOTMINQUERY:
1419       case OP_NOTPOSQUERY:
1420       case OP_NOTQUERYI:
1421       case OP_NOTMINQUERYI:
1422       case OP_NOTPOSQUERYI:
1423       fast_forward_allowed = FALSE;
1424       if (count == 0)
1425         count = 1;
1426       cc += 2;
1427 #ifdef SUPPORT_UNICODE
1428       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1429 #endif
1430       continue;
1431 
1432       case OP_CLASS:
1433       case OP_NCLASS:
1434 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1435       case OP_XCLASS:
1436       accelerated_start = cc;
1437       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1438 #else
1439       accelerated_start = cc;
1440       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1441 #endif
1442 
1443       switch (*cc)
1444         {
1445         case OP_CRSTAR:
1446         case OP_CRMINSTAR:
1447         case OP_CRPLUS:
1448         case OP_CRMINPLUS:
1449         case OP_CRPOSSTAR:
1450         case OP_CRPOSPLUS:
1451         cc++;
1452         break;
1453 
1454         case OP_CRRANGE:
1455         case OP_CRMINRANGE:
1456         case OP_CRPOSRANGE:
1457         cc += 2 * IMM2_SIZE;
1458         /* Fall through */
1459         case OP_CRQUERY:
1460         case OP_CRMINQUERY:
1461         case OP_CRPOSQUERY:
1462         cc++;
1463         if (count == 0)
1464           count = 1;
1465         /* Fall through */
1466         default:
1467         accelerated_start = NULL;
1468         fast_forward_allowed = FALSE;
1469         continue;
1470         }
1471       break;
1472 
1473       case OP_ONCE:
1474       case OP_BRA:
1475       case OP_CBRA:
1476       end = cc + GET(cc, 1);
1477 
1478       if (*end == OP_KET && PRIVATE_DATA(end) == 0)
1479         {
1480         if (*cc == OP_CBRA)
1481           {
1482           if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1483             break;
1484           cc += IMM2_SIZE;
1485           }
1486 
1487         cc += 1 + LINK_SIZE;
1488         continue;
1489         }
1490 
1491       fast_forward_allowed = FALSE;
1492       if (depth >= 4)
1493         break;
1494 
1495       end = bracketend(cc) - (1 + LINK_SIZE);
1496       if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1497         break;
1498 
1499       if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1500         break;
1501 
1502       count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
1503       if (count < EARLY_FAIL_ENHANCE_MAX)
1504         {
1505         cc = end + (1 + LINK_SIZE);
1506         continue;
1507         }
1508       break;
1509 
1510       case OP_KET:
1511       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1512       if (cc >= next_alt)
1513         break;
1514       cc += 1 + LINK_SIZE;
1515       continue;
1516       }
1517 
1518     if (accelerated_start != NULL)
1519       {
1520       if (count == 0)
1521         {
1522         count++;
1523 
1524         if (fast_forward_allowed && *next_alt == OP_KET)
1525           {
1526           common->fast_forward_bc_ptr = accelerated_start;
1527           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1528           *private_data_start += sizeof(sljit_sw);
1529           }
1530         else
1531           {
1532           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1533 
1534           if (common->early_fail_start_ptr == 0)
1535             common->early_fail_start_ptr = *private_data_start;
1536 
1537           *private_data_start += sizeof(sljit_sw);
1538           common->early_fail_end_ptr = *private_data_start;
1539 
1540           if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1541             return EARLY_FAIL_ENHANCE_MAX;
1542           }
1543         }
1544       else
1545         {
1546         common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1547 
1548         if (common->early_fail_start_ptr == 0)
1549           common->early_fail_start_ptr = *private_data_start;
1550 
1551         *private_data_start += 2 * sizeof(sljit_sw);
1552         common->early_fail_end_ptr = *private_data_start;
1553 
1554         if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1555           return EARLY_FAIL_ENHANCE_MAX;
1556         }
1557 
1558       count++;
1559 
1560       if (count < EARLY_FAIL_ENHANCE_MAX)
1561         continue;
1562       }
1563 
1564     break;
1565     }
1566 
1567   if (*cc != OP_ALT && *cc != OP_KET)
1568     result = EARLY_FAIL_ENHANCE_MAX;
1569   else if (result < count)
1570     result = count;
1571 
1572   fast_forward_allowed = FALSE;
1573   cc = next_alt;
1574   }
1575 while (*cc == OP_ALT);
1576 
1577 return result;
1578 }
1579 
get_class_iterator_size(PCRE2_SPTR cc)1580 static int get_class_iterator_size(PCRE2_SPTR cc)
1581 {
1582 sljit_u32 min;
1583 sljit_u32 max;
1584 switch(*cc)
1585   {
1586   case OP_CRSTAR:
1587   case OP_CRPLUS:
1588   return 2;
1589 
1590   case OP_CRMINSTAR:
1591   case OP_CRMINPLUS:
1592   case OP_CRQUERY:
1593   case OP_CRMINQUERY:
1594   return 1;
1595 
1596   case OP_CRRANGE:
1597   case OP_CRMINRANGE:
1598   min = GET2(cc, 1);
1599   max = GET2(cc, 1 + IMM2_SIZE);
1600   if (max == 0)
1601     return (*cc == OP_CRRANGE) ? 2 : 1;
1602   max -= min;
1603   if (max > 2)
1604     max = 2;
1605   return max;
1606 
1607   default:
1608   return 0;
1609   }
1610 }
1611 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1612 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1613 {
1614 PCRE2_SPTR end = bracketend(begin);
1615 PCRE2_SPTR next;
1616 PCRE2_SPTR next_end;
1617 PCRE2_SPTR max_end;
1618 PCRE2_UCHAR type;
1619 sljit_sw length = end - begin;
1620 sljit_s32 min, max, i;
1621 
1622 /* Detect fixed iterations first. */
1623 if (end[-(1 + LINK_SIZE)] != OP_KET)
1624   return FALSE;
1625 
1626 /* Already detected repeat. */
1627 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1628   return TRUE;
1629 
1630 next = end;
1631 min = 1;
1632 while (1)
1633   {
1634   if (*next != *begin)
1635     break;
1636   next_end = bracketend(next);
1637   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1638     break;
1639   next = next_end;
1640   min++;
1641   }
1642 
1643 if (min == 2)
1644   return FALSE;
1645 
1646 max = 0;
1647 max_end = next;
1648 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1649   {
1650   type = *next;
1651   while (1)
1652     {
1653     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1654       break;
1655     next_end = bracketend(next + 2 + LINK_SIZE);
1656     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1657       break;
1658     next = next_end;
1659     max++;
1660     }
1661 
1662   if (next[0] == type && next[1] == *begin && max >= 1)
1663     {
1664     next_end = bracketend(next + 1);
1665     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1666       {
1667       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1668         if (*next_end != OP_KET)
1669           break;
1670 
1671       if (i == max)
1672         {
1673         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1674         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1675         /* +2 the original and the last. */
1676         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1677         if (min == 1)
1678           return TRUE;
1679         min--;
1680         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1681         }
1682       }
1683     }
1684   }
1685 
1686 if (min >= 3)
1687   {
1688   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1689   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1690   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1691   return TRUE;
1692   }
1693 
1694 return FALSE;
1695 }
1696 
1697 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1698     case OP_MINSTAR: \
1699     case OP_MINPLUS: \
1700     case OP_QUERY: \
1701     case OP_MINQUERY: \
1702     case OP_MINSTARI: \
1703     case OP_MINPLUSI: \
1704     case OP_QUERYI: \
1705     case OP_MINQUERYI: \
1706     case OP_NOTMINSTAR: \
1707     case OP_NOTMINPLUS: \
1708     case OP_NOTQUERY: \
1709     case OP_NOTMINQUERY: \
1710     case OP_NOTMINSTARI: \
1711     case OP_NOTMINPLUSI: \
1712     case OP_NOTQUERYI: \
1713     case OP_NOTMINQUERYI:
1714 
1715 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1716     case OP_STAR: \
1717     case OP_PLUS: \
1718     case OP_STARI: \
1719     case OP_PLUSI: \
1720     case OP_NOTSTAR: \
1721     case OP_NOTPLUS: \
1722     case OP_NOTSTARI: \
1723     case OP_NOTPLUSI:
1724 
1725 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1726     case OP_UPTO: \
1727     case OP_MINUPTO: \
1728     case OP_UPTOI: \
1729     case OP_MINUPTOI: \
1730     case OP_NOTUPTO: \
1731     case OP_NOTMINUPTO: \
1732     case OP_NOTUPTOI: \
1733     case OP_NOTMINUPTOI:
1734 
1735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1736     case OP_TYPEMINSTAR: \
1737     case OP_TYPEMINPLUS: \
1738     case OP_TYPEQUERY: \
1739     case OP_TYPEMINQUERY:
1740 
1741 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1742     case OP_TYPESTAR: \
1743     case OP_TYPEPLUS:
1744 
1745 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1746     case OP_TYPEUPTO: \
1747     case OP_TYPEMINUPTO:
1748 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1749 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1750 {
1751 PCRE2_SPTR cc = common->start;
1752 PCRE2_SPTR alternative;
1753 PCRE2_SPTR end = NULL;
1754 int private_data_ptr = *private_data_start;
1755 int space, size, bracketlen;
1756 BOOL repeat_check = TRUE;
1757 
1758 while (cc < ccend)
1759   {
1760   space = 0;
1761   size = 0;
1762   bracketlen = 0;
1763   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1764     break;
1765 
1766   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1767     {
1768     if (detect_repeat(common, cc))
1769       {
1770       /* These brackets are converted to repeats, so no global
1771       based single character repeat is allowed. */
1772       if (cc >= end)
1773         end = bracketend(cc);
1774       }
1775     }
1776   repeat_check = TRUE;
1777 
1778   switch(*cc)
1779     {
1780     case OP_KET:
1781     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1782       {
1783       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1784       private_data_ptr += sizeof(sljit_sw);
1785       cc += common->private_data_ptrs[cc + 1 - common->start];
1786       }
1787     cc += 1 + LINK_SIZE;
1788     break;
1789 
1790     case OP_ASSERT:
1791     case OP_ASSERT_NOT:
1792     case OP_ASSERTBACK:
1793     case OP_ASSERTBACK_NOT:
1794     case OP_ASSERT_NA:
1795     case OP_ASSERTBACK_NA:
1796     case OP_ONCE:
1797     case OP_SCRIPT_RUN:
1798     case OP_BRAPOS:
1799     case OP_SBRA:
1800     case OP_SBRAPOS:
1801     case OP_SCOND:
1802     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1803     private_data_ptr += sizeof(sljit_sw);
1804     bracketlen = 1 + LINK_SIZE;
1805     break;
1806 
1807     case OP_CBRAPOS:
1808     case OP_SCBRAPOS:
1809     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1810     private_data_ptr += sizeof(sljit_sw);
1811     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1812     break;
1813 
1814     case OP_COND:
1815     /* Might be a hidden SCOND. */
1816     alternative = cc + GET(cc, 1);
1817     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1818       {
1819       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1820       private_data_ptr += sizeof(sljit_sw);
1821       }
1822     bracketlen = 1 + LINK_SIZE;
1823     break;
1824 
1825     case OP_BRA:
1826     bracketlen = 1 + LINK_SIZE;
1827     break;
1828 
1829     case OP_CBRA:
1830     case OP_SCBRA:
1831     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1832     break;
1833 
1834     case OP_BRAZERO:
1835     case OP_BRAMINZERO:
1836     case OP_BRAPOSZERO:
1837     size = 1;
1838     repeat_check = FALSE;
1839     break;
1840 
1841     CASE_ITERATOR_PRIVATE_DATA_1
1842     size = -2;
1843     space = 1;
1844     break;
1845 
1846     CASE_ITERATOR_PRIVATE_DATA_2A
1847     size = -2;
1848     space = 2;
1849     break;
1850 
1851     CASE_ITERATOR_PRIVATE_DATA_2B
1852     size = -(2 + IMM2_SIZE);
1853     space = 2;
1854     break;
1855 
1856     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1857     size = 1;
1858     space = 1;
1859     break;
1860 
1861     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1862     size = 1;
1863     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1864       space = 2;
1865     break;
1866 
1867     case OP_TYPEUPTO:
1868     size = 1 + IMM2_SIZE;
1869     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1870       space = 2;
1871     break;
1872 
1873     case OP_TYPEMINUPTO:
1874     size = 1 + IMM2_SIZE;
1875     space = 2;
1876     break;
1877 
1878     case OP_CLASS:
1879     case OP_NCLASS:
1880     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1881     space = get_class_iterator_size(cc + size);
1882     break;
1883 
1884 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1885     case OP_XCLASS:
1886     size = GET(cc, 1);
1887     space = get_class_iterator_size(cc + size);
1888     break;
1889 #endif
1890 
1891     default:
1892     cc = next_opcode(common, cc);
1893     SLJIT_ASSERT(cc != NULL);
1894     break;
1895     }
1896 
1897   /* Character iterators, which are not inside a repeated bracket,
1898      gets a private slot instead of allocating it on the stack. */
1899   if (space > 0 && cc >= end)
1900     {
1901     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1902     private_data_ptr += sizeof(sljit_sw) * space;
1903     }
1904 
1905   if (size != 0)
1906     {
1907     if (size < 0)
1908       {
1909       cc += -size;
1910 #ifdef SUPPORT_UNICODE
1911       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1912 #endif
1913       }
1914     else
1915       cc += size;
1916     }
1917 
1918   if (bracketlen > 0)
1919     {
1920     if (cc >= end)
1921       {
1922       end = bracketend(cc);
1923       if (end[-1 - LINK_SIZE] == OP_KET)
1924         end = NULL;
1925       }
1926     cc += bracketlen;
1927     }
1928   }
1929 *private_data_start = private_data_ptr;
1930 }
1931 
1932 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1933 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1934 {
1935 int length = 0;
1936 int possessive = 0;
1937 BOOL stack_restore = FALSE;
1938 BOOL setsom_found = recursive;
1939 BOOL setmark_found = recursive;
1940 /* The last capture is a local variable even for recursions. */
1941 BOOL capture_last_found = FALSE;
1942 
1943 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1944 SLJIT_ASSERT(common->control_head_ptr != 0);
1945 *needs_control_head = TRUE;
1946 #else
1947 *needs_control_head = FALSE;
1948 #endif
1949 
1950 if (ccend == NULL)
1951   {
1952   ccend = bracketend(cc) - (1 + LINK_SIZE);
1953   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1954     {
1955     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1956     /* This is correct regardless of common->capture_last_ptr. */
1957     capture_last_found = TRUE;
1958     }
1959   cc = next_opcode(common, cc);
1960   }
1961 
1962 SLJIT_ASSERT(cc != NULL);
1963 while (cc < ccend)
1964   switch(*cc)
1965     {
1966     case OP_SET_SOM:
1967     SLJIT_ASSERT(common->has_set_som);
1968     stack_restore = TRUE;
1969     if (!setsom_found)
1970       {
1971       length += 2;
1972       setsom_found = TRUE;
1973       }
1974     cc += 1;
1975     break;
1976 
1977     case OP_MARK:
1978     case OP_COMMIT_ARG:
1979     case OP_PRUNE_ARG:
1980     case OP_THEN_ARG:
1981     SLJIT_ASSERT(common->mark_ptr != 0);
1982     stack_restore = TRUE;
1983     if (!setmark_found)
1984       {
1985       length += 2;
1986       setmark_found = TRUE;
1987       }
1988     if (common->control_head_ptr != 0)
1989       *needs_control_head = TRUE;
1990     cc += 1 + 2 + cc[1];
1991     break;
1992 
1993     case OP_RECURSE:
1994     stack_restore = TRUE;
1995     if (common->has_set_som && !setsom_found)
1996       {
1997       length += 2;
1998       setsom_found = TRUE;
1999       }
2000     if (common->mark_ptr != 0 && !setmark_found)
2001       {
2002       length += 2;
2003       setmark_found = TRUE;
2004       }
2005     if (common->capture_last_ptr != 0 && !capture_last_found)
2006       {
2007       length += 2;
2008       capture_last_found = TRUE;
2009       }
2010     cc += 1 + LINK_SIZE;
2011     break;
2012 
2013     case OP_CBRA:
2014     case OP_CBRAPOS:
2015     case OP_SCBRA:
2016     case OP_SCBRAPOS:
2017     stack_restore = TRUE;
2018     if (common->capture_last_ptr != 0 && !capture_last_found)
2019       {
2020       length += 2;
2021       capture_last_found = TRUE;
2022       }
2023     length += 3;
2024     cc += 1 + LINK_SIZE + IMM2_SIZE;
2025     break;
2026 
2027     case OP_THEN:
2028     stack_restore = TRUE;
2029     if (common->control_head_ptr != 0)
2030       *needs_control_head = TRUE;
2031     cc ++;
2032     break;
2033 
2034     default:
2035     stack_restore = TRUE;
2036     /* Fall through. */
2037 
2038     case OP_NOT_WORD_BOUNDARY:
2039     case OP_WORD_BOUNDARY:
2040     case OP_NOT_DIGIT:
2041     case OP_DIGIT:
2042     case OP_NOT_WHITESPACE:
2043     case OP_WHITESPACE:
2044     case OP_NOT_WORDCHAR:
2045     case OP_WORDCHAR:
2046     case OP_ANY:
2047     case OP_ALLANY:
2048     case OP_ANYBYTE:
2049     case OP_NOTPROP:
2050     case OP_PROP:
2051     case OP_ANYNL:
2052     case OP_NOT_HSPACE:
2053     case OP_HSPACE:
2054     case OP_NOT_VSPACE:
2055     case OP_VSPACE:
2056     case OP_EXTUNI:
2057     case OP_EODN:
2058     case OP_EOD:
2059     case OP_CIRC:
2060     case OP_CIRCM:
2061     case OP_DOLL:
2062     case OP_DOLLM:
2063     case OP_CHAR:
2064     case OP_CHARI:
2065     case OP_NOT:
2066     case OP_NOTI:
2067 
2068     case OP_EXACT:
2069     case OP_POSSTAR:
2070     case OP_POSPLUS:
2071     case OP_POSQUERY:
2072     case OP_POSUPTO:
2073 
2074     case OP_EXACTI:
2075     case OP_POSSTARI:
2076     case OP_POSPLUSI:
2077     case OP_POSQUERYI:
2078     case OP_POSUPTOI:
2079 
2080     case OP_NOTEXACT:
2081     case OP_NOTPOSSTAR:
2082     case OP_NOTPOSPLUS:
2083     case OP_NOTPOSQUERY:
2084     case OP_NOTPOSUPTO:
2085 
2086     case OP_NOTEXACTI:
2087     case OP_NOTPOSSTARI:
2088     case OP_NOTPOSPLUSI:
2089     case OP_NOTPOSQUERYI:
2090     case OP_NOTPOSUPTOI:
2091 
2092     case OP_TYPEEXACT:
2093     case OP_TYPEPOSSTAR:
2094     case OP_TYPEPOSPLUS:
2095     case OP_TYPEPOSQUERY:
2096     case OP_TYPEPOSUPTO:
2097 
2098     case OP_CLASS:
2099     case OP_NCLASS:
2100     case OP_XCLASS:
2101 
2102     case OP_CALLOUT:
2103     case OP_CALLOUT_STR:
2104 
2105     cc = next_opcode(common, cc);
2106     SLJIT_ASSERT(cc != NULL);
2107     break;
2108     }
2109 
2110 /* Possessive quantifiers can use a special case. */
2111 if (SLJIT_UNLIKELY(possessive == length))
2112   return stack_restore ? no_frame : no_stack;
2113 
2114 if (length > 0)
2115   return length + 1;
2116 return stack_restore ? no_frame : no_stack;
2117 }
2118 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2119 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2120 {
2121 DEFINE_COMPILER;
2122 BOOL setsom_found = FALSE;
2123 BOOL setmark_found = FALSE;
2124 /* The last capture is a local variable even for recursions. */
2125 BOOL capture_last_found = FALSE;
2126 int offset;
2127 
2128 /* >= 1 + shortest item size (2) */
2129 SLJIT_UNUSED_ARG(stacktop);
2130 SLJIT_ASSERT(stackpos >= stacktop + 2);
2131 
2132 stackpos = STACK(stackpos);
2133 if (ccend == NULL)
2134   {
2135   ccend = bracketend(cc) - (1 + LINK_SIZE);
2136   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2137     cc = next_opcode(common, cc);
2138   }
2139 
2140 SLJIT_ASSERT(cc != NULL);
2141 while (cc < ccend)
2142   switch(*cc)
2143     {
2144     case OP_SET_SOM:
2145     SLJIT_ASSERT(common->has_set_som);
2146     if (!setsom_found)
2147       {
2148       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2149       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2150       stackpos -= (int)sizeof(sljit_sw);
2151       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2152       stackpos -= (int)sizeof(sljit_sw);
2153       setsom_found = TRUE;
2154       }
2155     cc += 1;
2156     break;
2157 
2158     case OP_MARK:
2159     case OP_COMMIT_ARG:
2160     case OP_PRUNE_ARG:
2161     case OP_THEN_ARG:
2162     SLJIT_ASSERT(common->mark_ptr != 0);
2163     if (!setmark_found)
2164       {
2165       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2166       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2167       stackpos -= (int)sizeof(sljit_sw);
2168       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2169       stackpos -= (int)sizeof(sljit_sw);
2170       setmark_found = TRUE;
2171       }
2172     cc += 1 + 2 + cc[1];
2173     break;
2174 
2175     case OP_RECURSE:
2176     if (common->has_set_som && !setsom_found)
2177       {
2178       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2179       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2180       stackpos -= (int)sizeof(sljit_sw);
2181       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2182       stackpos -= (int)sizeof(sljit_sw);
2183       setsom_found = TRUE;
2184       }
2185     if (common->mark_ptr != 0 && !setmark_found)
2186       {
2187       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2188       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2189       stackpos -= (int)sizeof(sljit_sw);
2190       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2191       stackpos -= (int)sizeof(sljit_sw);
2192       setmark_found = TRUE;
2193       }
2194     if (common->capture_last_ptr != 0 && !capture_last_found)
2195       {
2196       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2197       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2198       stackpos -= (int)sizeof(sljit_sw);
2199       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2200       stackpos -= (int)sizeof(sljit_sw);
2201       capture_last_found = TRUE;
2202       }
2203     cc += 1 + LINK_SIZE;
2204     break;
2205 
2206     case OP_CBRA:
2207     case OP_CBRAPOS:
2208     case OP_SCBRA:
2209     case OP_SCBRAPOS:
2210     if (common->capture_last_ptr != 0 && !capture_last_found)
2211       {
2212       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2213       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2214       stackpos -= (int)sizeof(sljit_sw);
2215       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2216       stackpos -= (int)sizeof(sljit_sw);
2217       capture_last_found = TRUE;
2218       }
2219     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2220     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2221     stackpos -= (int)sizeof(sljit_sw);
2222     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2223     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2224     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2225     stackpos -= (int)sizeof(sljit_sw);
2226     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2227     stackpos -= (int)sizeof(sljit_sw);
2228 
2229     cc += 1 + LINK_SIZE + IMM2_SIZE;
2230     break;
2231 
2232     default:
2233     cc = next_opcode(common, cc);
2234     SLJIT_ASSERT(cc != NULL);
2235     break;
2236     }
2237 
2238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2239 SLJIT_ASSERT(stackpos == STACK(stacktop));
2240 }
2241 
2242 #define RECURSE_TMP_REG_COUNT 3
2243 
2244 typedef struct delayed_mem_copy_status {
2245   struct sljit_compiler *compiler;
2246   int store_bases[RECURSE_TMP_REG_COUNT];
2247   int store_offsets[RECURSE_TMP_REG_COUNT];
2248   int tmp_regs[RECURSE_TMP_REG_COUNT];
2249   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2250   int next_tmp_reg;
2251 } delayed_mem_copy_status;
2252 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2253 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2254 {
2255 int i;
2256 
2257 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2258   {
2259   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2260   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2261 
2262   status->store_bases[i] = -1;
2263   }
2264 status->next_tmp_reg = 0;
2265 status->compiler = common->compiler;
2266 }
2267 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2268 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2269   int store_base, sljit_sw store_offset)
2270 {
2271 struct sljit_compiler *compiler = status->compiler;
2272 int next_tmp_reg = status->next_tmp_reg;
2273 int tmp_reg = status->tmp_regs[next_tmp_reg];
2274 
2275 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2276 
2277 if (status->store_bases[next_tmp_reg] == -1)
2278   {
2279   /* Preserve virtual registers. */
2280   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2281     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2282   }
2283 else
2284   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2285 
2286 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2287 status->store_bases[next_tmp_reg] = store_base;
2288 status->store_offsets[next_tmp_reg] = store_offset;
2289 
2290 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2291 }
2292 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2293 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2294 {
2295 struct sljit_compiler *compiler = status->compiler;
2296 int next_tmp_reg = status->next_tmp_reg;
2297 int tmp_reg, saved_tmp_reg, i;
2298 
2299 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2300   {
2301   if (status->store_bases[next_tmp_reg] != -1)
2302     {
2303     tmp_reg = status->tmp_regs[next_tmp_reg];
2304     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2305 
2306     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2307 
2308     /* Restore virtual registers. */
2309     if (sljit_get_register_index(saved_tmp_reg) < 0)
2310       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2311     }
2312 
2313   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2314   }
2315 }
2316 
2317 #undef RECURSE_TMP_REG_COUNT
2318 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2319 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2320   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2321 {
2322 int length = 1;
2323 int size;
2324 PCRE2_SPTR alternative;
2325 BOOL quit_found = FALSE;
2326 BOOL accept_found = FALSE;
2327 BOOL setsom_found = FALSE;
2328 BOOL setmark_found = FALSE;
2329 BOOL capture_last_found = FALSE;
2330 BOOL control_head_found = FALSE;
2331 
2332 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2333 SLJIT_ASSERT(common->control_head_ptr != 0);
2334 control_head_found = TRUE;
2335 #endif
2336 
2337 /* Calculate the sum of the private machine words. */
2338 while (cc < ccend)
2339   {
2340   size = 0;
2341   switch(*cc)
2342     {
2343     case OP_SET_SOM:
2344     SLJIT_ASSERT(common->has_set_som);
2345     setsom_found = TRUE;
2346     cc += 1;
2347     break;
2348 
2349     case OP_RECURSE:
2350     if (common->has_set_som)
2351       setsom_found = TRUE;
2352     if (common->mark_ptr != 0)
2353       setmark_found = TRUE;
2354     if (common->capture_last_ptr != 0)
2355       capture_last_found = TRUE;
2356     cc += 1 + LINK_SIZE;
2357     break;
2358 
2359     case OP_KET:
2360     if (PRIVATE_DATA(cc) != 0)
2361       {
2362       length++;
2363       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2364       cc += PRIVATE_DATA(cc + 1);
2365       }
2366     cc += 1 + LINK_SIZE;
2367     break;
2368 
2369     case OP_ASSERT:
2370     case OP_ASSERT_NOT:
2371     case OP_ASSERTBACK:
2372     case OP_ASSERTBACK_NOT:
2373     case OP_ASSERT_NA:
2374     case OP_ASSERTBACK_NA:
2375     case OP_ONCE:
2376     case OP_SCRIPT_RUN:
2377     case OP_BRAPOS:
2378     case OP_SBRA:
2379     case OP_SBRAPOS:
2380     case OP_SCOND:
2381     length++;
2382     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2383     cc += 1 + LINK_SIZE;
2384     break;
2385 
2386     case OP_CBRA:
2387     case OP_SCBRA:
2388     length += 2;
2389     if (common->capture_last_ptr != 0)
2390       capture_last_found = TRUE;
2391     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2392       length++;
2393     cc += 1 + LINK_SIZE + IMM2_SIZE;
2394     break;
2395 
2396     case OP_CBRAPOS:
2397     case OP_SCBRAPOS:
2398     length += 2 + 2;
2399     if (common->capture_last_ptr != 0)
2400       capture_last_found = TRUE;
2401     cc += 1 + LINK_SIZE + IMM2_SIZE;
2402     break;
2403 
2404     case OP_COND:
2405     /* Might be a hidden SCOND. */
2406     alternative = cc + GET(cc, 1);
2407     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2408       length++;
2409     cc += 1 + LINK_SIZE;
2410     break;
2411 
2412     CASE_ITERATOR_PRIVATE_DATA_1
2413     if (PRIVATE_DATA(cc) != 0)
2414       length++;
2415     cc += 2;
2416 #ifdef SUPPORT_UNICODE
2417     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2418 #endif
2419     break;
2420 
2421     CASE_ITERATOR_PRIVATE_DATA_2A
2422     if (PRIVATE_DATA(cc) != 0)
2423       length += 2;
2424     cc += 2;
2425 #ifdef SUPPORT_UNICODE
2426     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2427 #endif
2428     break;
2429 
2430     CASE_ITERATOR_PRIVATE_DATA_2B
2431     if (PRIVATE_DATA(cc) != 0)
2432       length += 2;
2433     cc += 2 + IMM2_SIZE;
2434 #ifdef SUPPORT_UNICODE
2435     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2436 #endif
2437     break;
2438 
2439     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2440     if (PRIVATE_DATA(cc) != 0)
2441       length++;
2442     cc += 1;
2443     break;
2444 
2445     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2446     if (PRIVATE_DATA(cc) != 0)
2447       length += 2;
2448     cc += 1;
2449     break;
2450 
2451     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2452     if (PRIVATE_DATA(cc) != 0)
2453       length += 2;
2454     cc += 1 + IMM2_SIZE;
2455     break;
2456 
2457     case OP_CLASS:
2458     case OP_NCLASS:
2459 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2460     case OP_XCLASS:
2461     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2462 #else
2463     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2464 #endif
2465     if (PRIVATE_DATA(cc) != 0)
2466       length += get_class_iterator_size(cc + size);
2467     cc += size;
2468     break;
2469 
2470     case OP_MARK:
2471     case OP_COMMIT_ARG:
2472     case OP_PRUNE_ARG:
2473     case OP_THEN_ARG:
2474     SLJIT_ASSERT(common->mark_ptr != 0);
2475     if (!setmark_found)
2476       setmark_found = TRUE;
2477     if (common->control_head_ptr != 0)
2478       control_head_found = TRUE;
2479     if (*cc != OP_MARK)
2480       quit_found = TRUE;
2481 
2482     cc += 1 + 2 + cc[1];
2483     break;
2484 
2485     case OP_PRUNE:
2486     case OP_SKIP:
2487     case OP_COMMIT:
2488     quit_found = TRUE;
2489     cc++;
2490     break;
2491 
2492     case OP_SKIP_ARG:
2493     quit_found = TRUE;
2494     cc += 1 + 2 + cc[1];
2495     break;
2496 
2497     case OP_THEN:
2498     SLJIT_ASSERT(common->control_head_ptr != 0);
2499     quit_found = TRUE;
2500     if (!control_head_found)
2501       control_head_found = TRUE;
2502     cc++;
2503     break;
2504 
2505     case OP_ACCEPT:
2506     case OP_ASSERT_ACCEPT:
2507     accept_found = TRUE;
2508     cc++;
2509     break;
2510 
2511     default:
2512     cc = next_opcode(common, cc);
2513     SLJIT_ASSERT(cc != NULL);
2514     break;
2515     }
2516   }
2517 SLJIT_ASSERT(cc == ccend);
2518 
2519 if (control_head_found)
2520   length++;
2521 if (capture_last_found)
2522   length++;
2523 if (quit_found)
2524   {
2525   if (setsom_found)
2526     length++;
2527   if (setmark_found)
2528     length++;
2529   }
2530 
2531 *needs_control_head = control_head_found;
2532 *has_quit = quit_found;
2533 *has_accept = accept_found;
2534 return length;
2535 }
2536 
2537 enum copy_recurse_data_types {
2538   recurse_copy_from_global,
2539   recurse_copy_private_to_global,
2540   recurse_copy_shared_to_global,
2541   recurse_copy_kept_shared_to_global,
2542   recurse_swap_global
2543 };
2544 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2545 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2546   int type, int stackptr, int stacktop, BOOL has_quit)
2547 {
2548 delayed_mem_copy_status status;
2549 PCRE2_SPTR alternative;
2550 sljit_sw private_srcw[2];
2551 sljit_sw shared_srcw[3];
2552 sljit_sw kept_shared_srcw[2];
2553 int private_count, shared_count, kept_shared_count;
2554 int from_sp, base_reg, offset, i;
2555 BOOL setsom_found = FALSE;
2556 BOOL setmark_found = FALSE;
2557 BOOL capture_last_found = FALSE;
2558 BOOL control_head_found = FALSE;
2559 
2560 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2561 SLJIT_ASSERT(common->control_head_ptr != 0);
2562 control_head_found = TRUE;
2563 #endif
2564 
2565 switch (type)
2566   {
2567   case recurse_copy_from_global:
2568   from_sp = TRUE;
2569   base_reg = STACK_TOP;
2570   break;
2571 
2572   case recurse_copy_private_to_global:
2573   case recurse_copy_shared_to_global:
2574   case recurse_copy_kept_shared_to_global:
2575   from_sp = FALSE;
2576   base_reg = STACK_TOP;
2577   break;
2578 
2579   default:
2580   SLJIT_ASSERT(type == recurse_swap_global);
2581   from_sp = FALSE;
2582   base_reg = TMP2;
2583   break;
2584   }
2585 
2586 stackptr = STACK(stackptr);
2587 stacktop = STACK(stacktop);
2588 
2589 status.tmp_regs[0] = TMP1;
2590 status.saved_tmp_regs[0] = TMP1;
2591 
2592 if (base_reg != TMP2)
2593   {
2594   status.tmp_regs[1] = TMP2;
2595   status.saved_tmp_regs[1] = TMP2;
2596   }
2597 else
2598   {
2599   status.saved_tmp_regs[1] = RETURN_ADDR;
2600   if (HAS_VIRTUAL_REGISTERS)
2601     status.tmp_regs[1] = STR_PTR;
2602   else
2603     status.tmp_regs[1] = RETURN_ADDR;
2604   }
2605 
2606 status.saved_tmp_regs[2] = TMP3;
2607 if (HAS_VIRTUAL_REGISTERS)
2608   status.tmp_regs[2] = STR_END;
2609 else
2610   status.tmp_regs[2] = TMP3;
2611 
2612 delayed_mem_copy_init(&status, common);
2613 
2614 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2615   {
2616   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2617 
2618   if (!from_sp)
2619     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2620 
2621   if (from_sp || type == recurse_swap_global)
2622     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2623   }
2624 
2625 stackptr += sizeof(sljit_sw);
2626 
2627 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2628 if (type != recurse_copy_shared_to_global)
2629   {
2630   if (!from_sp)
2631     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2632 
2633   if (from_sp || type == recurse_swap_global)
2634     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2635   }
2636 
2637 stackptr += sizeof(sljit_sw);
2638 #endif
2639 
2640 while (cc < ccend)
2641   {
2642   private_count = 0;
2643   shared_count = 0;
2644   kept_shared_count = 0;
2645 
2646   switch(*cc)
2647     {
2648     case OP_SET_SOM:
2649     SLJIT_ASSERT(common->has_set_som);
2650     if (has_quit && !setsom_found)
2651       {
2652       kept_shared_srcw[0] = OVECTOR(0);
2653       kept_shared_count = 1;
2654       setsom_found = TRUE;
2655       }
2656     cc += 1;
2657     break;
2658 
2659     case OP_RECURSE:
2660     if (has_quit)
2661       {
2662       if (common->has_set_som && !setsom_found)
2663         {
2664         kept_shared_srcw[0] = OVECTOR(0);
2665         kept_shared_count = 1;
2666         setsom_found = TRUE;
2667         }
2668       if (common->mark_ptr != 0 && !setmark_found)
2669         {
2670         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2671         kept_shared_count++;
2672         setmark_found = TRUE;
2673         }
2674       }
2675     if (common->capture_last_ptr != 0 && !capture_last_found)
2676       {
2677       shared_srcw[0] = common->capture_last_ptr;
2678       shared_count = 1;
2679       capture_last_found = TRUE;
2680       }
2681     cc += 1 + LINK_SIZE;
2682     break;
2683 
2684     case OP_KET:
2685     if (PRIVATE_DATA(cc) != 0)
2686       {
2687       private_count = 1;
2688       private_srcw[0] = PRIVATE_DATA(cc);
2689       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2690       cc += PRIVATE_DATA(cc + 1);
2691       }
2692     cc += 1 + LINK_SIZE;
2693     break;
2694 
2695     case OP_ASSERT:
2696     case OP_ASSERT_NOT:
2697     case OP_ASSERTBACK:
2698     case OP_ASSERTBACK_NOT:
2699     case OP_ASSERT_NA:
2700     case OP_ASSERTBACK_NA:
2701     case OP_ONCE:
2702     case OP_SCRIPT_RUN:
2703     case OP_BRAPOS:
2704     case OP_SBRA:
2705     case OP_SBRAPOS:
2706     case OP_SCOND:
2707     private_count = 1;
2708     private_srcw[0] = PRIVATE_DATA(cc);
2709     cc += 1 + LINK_SIZE;
2710     break;
2711 
2712     case OP_CBRA:
2713     case OP_SCBRA:
2714     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2715     shared_srcw[0] = OVECTOR(offset);
2716     shared_srcw[1] = OVECTOR(offset + 1);
2717     shared_count = 2;
2718 
2719     if (common->capture_last_ptr != 0 && !capture_last_found)
2720       {
2721       shared_srcw[2] = common->capture_last_ptr;
2722       shared_count = 3;
2723       capture_last_found = TRUE;
2724       }
2725 
2726     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2727       {
2728       private_count = 1;
2729       private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2730       }
2731     cc += 1 + LINK_SIZE + IMM2_SIZE;
2732     break;
2733 
2734     case OP_CBRAPOS:
2735     case OP_SCBRAPOS:
2736     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2737     shared_srcw[0] = OVECTOR(offset);
2738     shared_srcw[1] = OVECTOR(offset + 1);
2739     shared_count = 2;
2740 
2741     if (common->capture_last_ptr != 0 && !capture_last_found)
2742       {
2743       shared_srcw[2] = common->capture_last_ptr;
2744       shared_count = 3;
2745       capture_last_found = TRUE;
2746       }
2747 
2748     private_count = 2;
2749     private_srcw[0] = PRIVATE_DATA(cc);
2750     private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2751     cc += 1 + LINK_SIZE + IMM2_SIZE;
2752     break;
2753 
2754     case OP_COND:
2755     /* Might be a hidden SCOND. */
2756     alternative = cc + GET(cc, 1);
2757     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2758       {
2759       private_count = 1;
2760       private_srcw[0] = PRIVATE_DATA(cc);
2761       }
2762     cc += 1 + LINK_SIZE;
2763     break;
2764 
2765     CASE_ITERATOR_PRIVATE_DATA_1
2766     if (PRIVATE_DATA(cc))
2767       {
2768       private_count = 1;
2769       private_srcw[0] = PRIVATE_DATA(cc);
2770       }
2771     cc += 2;
2772 #ifdef SUPPORT_UNICODE
2773     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2774 #endif
2775     break;
2776 
2777     CASE_ITERATOR_PRIVATE_DATA_2A
2778     if (PRIVATE_DATA(cc))
2779       {
2780       private_count = 2;
2781       private_srcw[0] = PRIVATE_DATA(cc);
2782       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2783       }
2784     cc += 2;
2785 #ifdef SUPPORT_UNICODE
2786     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2787 #endif
2788     break;
2789 
2790     CASE_ITERATOR_PRIVATE_DATA_2B
2791     if (PRIVATE_DATA(cc))
2792       {
2793       private_count = 2;
2794       private_srcw[0] = PRIVATE_DATA(cc);
2795       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2796       }
2797     cc += 2 + IMM2_SIZE;
2798 #ifdef SUPPORT_UNICODE
2799     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2800 #endif
2801     break;
2802 
2803     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2804     if (PRIVATE_DATA(cc))
2805       {
2806       private_count = 1;
2807       private_srcw[0] = PRIVATE_DATA(cc);
2808       }
2809     cc += 1;
2810     break;
2811 
2812     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2813     if (PRIVATE_DATA(cc))
2814       {
2815       private_count = 2;
2816       private_srcw[0] = PRIVATE_DATA(cc);
2817       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2818       }
2819     cc += 1;
2820     break;
2821 
2822     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2823     if (PRIVATE_DATA(cc))
2824       {
2825       private_count = 2;
2826       private_srcw[0] = PRIVATE_DATA(cc);
2827       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2828       }
2829     cc += 1 + IMM2_SIZE;
2830     break;
2831 
2832     case OP_CLASS:
2833     case OP_NCLASS:
2834 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2835     case OP_XCLASS:
2836     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2837 #else
2838     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2839 #endif
2840     if (PRIVATE_DATA(cc) != 0)
2841       switch(get_class_iterator_size(cc + i))
2842         {
2843         case 1:
2844         private_count = 1;
2845         private_srcw[0] = PRIVATE_DATA(cc);
2846         break;
2847 
2848         case 2:
2849         private_count = 2;
2850         private_srcw[0] = PRIVATE_DATA(cc);
2851         private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852         break;
2853 
2854         default:
2855         SLJIT_UNREACHABLE();
2856         break;
2857         }
2858     cc += i;
2859     break;
2860 
2861     case OP_MARK:
2862     case OP_COMMIT_ARG:
2863     case OP_PRUNE_ARG:
2864     case OP_THEN_ARG:
2865     SLJIT_ASSERT(common->mark_ptr != 0);
2866     if (has_quit && !setmark_found)
2867       {
2868       kept_shared_srcw[0] = common->mark_ptr;
2869       kept_shared_count = 1;
2870       setmark_found = TRUE;
2871       }
2872     if (common->control_head_ptr != 0 && !control_head_found)
2873       {
2874       private_srcw[0] = common->control_head_ptr;
2875       private_count = 1;
2876       control_head_found = TRUE;
2877       }
2878     cc += 1 + 2 + cc[1];
2879     break;
2880 
2881     case OP_THEN:
2882     SLJIT_ASSERT(common->control_head_ptr != 0);
2883     if (!control_head_found)
2884       {
2885       private_srcw[0] = common->control_head_ptr;
2886       private_count = 1;
2887       control_head_found = TRUE;
2888       }
2889     cc++;
2890     break;
2891 
2892     default:
2893     cc = next_opcode(common, cc);
2894     SLJIT_ASSERT(cc != NULL);
2895     break;
2896     }
2897 
2898   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2899     {
2900     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2901 
2902     for (i = 0; i < private_count; i++)
2903       {
2904       SLJIT_ASSERT(private_srcw[i] != 0);
2905 
2906       if (!from_sp)
2907         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2908 
2909       if (from_sp || type == recurse_swap_global)
2910         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2911 
2912       stackptr += sizeof(sljit_sw);
2913       }
2914     }
2915   else
2916     stackptr += sizeof(sljit_sw) * private_count;
2917 
2918   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2919     {
2920     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2921 
2922     for (i = 0; i < shared_count; i++)
2923       {
2924       SLJIT_ASSERT(shared_srcw[i] != 0);
2925 
2926       if (!from_sp)
2927         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2928 
2929       if (from_sp || type == recurse_swap_global)
2930         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2931 
2932       stackptr += sizeof(sljit_sw);
2933       }
2934     }
2935   else
2936     stackptr += sizeof(sljit_sw) * shared_count;
2937 
2938   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2939     {
2940     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2941 
2942     for (i = 0; i < kept_shared_count; i++)
2943       {
2944       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2945 
2946       if (!from_sp)
2947         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2948 
2949       if (from_sp || type == recurse_swap_global)
2950         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2951 
2952       stackptr += sizeof(sljit_sw);
2953       }
2954     }
2955   else
2956     stackptr += sizeof(sljit_sw) * kept_shared_count;
2957   }
2958 
2959 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2960 
2961 delayed_mem_copy_finish(&status);
2962 }
2963 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2964 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2965 {
2966 PCRE2_SPTR end = bracketend(cc);
2967 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2968 
2969 /* Assert captures then. */
2970 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2971   current_offset = NULL;
2972 /* Conditional block does not. */
2973 if (*cc == OP_COND || *cc == OP_SCOND)
2974   has_alternatives = FALSE;
2975 
2976 cc = next_opcode(common, cc);
2977 if (has_alternatives)
2978   current_offset = common->then_offsets + (cc - common->start);
2979 
2980 while (cc < end)
2981   {
2982   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2983     cc = set_then_offsets(common, cc, current_offset);
2984   else
2985     {
2986     if (*cc == OP_ALT && has_alternatives)
2987       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2988     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2989       *current_offset = 1;
2990     cc = next_opcode(common, cc);
2991     }
2992   }
2993 
2994 return end;
2995 }
2996 
2997 #undef CASE_ITERATOR_PRIVATE_DATA_1
2998 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2999 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3000 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3001 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3002 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3003 
is_powerof2(unsigned int value)3004 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3005 {
3006 return (value & (value - 1)) == 0;
3007 }
3008 
set_jumps(jump_list * list,struct sljit_label * label)3009 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3010 {
3011 while (list)
3012   {
3013   /* sljit_set_label is clever enough to do nothing
3014   if either the jump or the label is NULL. */
3015   SET_LABEL(list->jump, label);
3016   list = list->next;
3017   }
3018 }
3019 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3020 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3021 {
3022 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3023 if (list_item)
3024   {
3025   list_item->next = *list;
3026   list_item->jump = jump;
3027   *list = list_item;
3028   }
3029 }
3030 
add_stub(compiler_common * common,struct sljit_jump * start)3031 static void add_stub(compiler_common *common, struct sljit_jump *start)
3032 {
3033 DEFINE_COMPILER;
3034 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3035 
3036 if (list_item)
3037   {
3038   list_item->start = start;
3039   list_item->quit = LABEL();
3040   list_item->next = common->stubs;
3041   common->stubs = list_item;
3042   }
3043 }
3044 
flush_stubs(compiler_common * common)3045 static void flush_stubs(compiler_common *common)
3046 {
3047 DEFINE_COMPILER;
3048 stub_list *list_item = common->stubs;
3049 
3050 while (list_item)
3051   {
3052   JUMPHERE(list_item->start);
3053   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3054   JUMPTO(SLJIT_JUMP, list_item->quit);
3055   list_item = list_item->next;
3056   }
3057 common->stubs = NULL;
3058 }
3059 
count_match(compiler_common * common)3060 static SLJIT_INLINE void count_match(compiler_common *common)
3061 {
3062 DEFINE_COMPILER;
3063 
3064 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3065 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3066 }
3067 
allocate_stack(compiler_common * common,int size)3068 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3069 {
3070 /* May destroy all locals and registers except TMP2. */
3071 DEFINE_COMPILER;
3072 
3073 SLJIT_ASSERT(size > 0);
3074 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3075 #ifdef DESTROY_REGISTERS
3076 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3077 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3078 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3081 #endif
3082 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3083 }
3084 
free_stack(compiler_common * common,int size)3085 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3086 {
3087 DEFINE_COMPILER;
3088 
3089 SLJIT_ASSERT(size > 0);
3090 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3091 }
3092 
allocate_read_only_data(compiler_common * common,sljit_uw size)3093 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3094 {
3095 DEFINE_COMPILER;
3096 sljit_uw *result;
3097 
3098 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3099   return NULL;
3100 
3101 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3102 if (SLJIT_UNLIKELY(result == NULL))
3103   {
3104   sljit_set_compiler_memory_error(compiler);
3105   return NULL;
3106   }
3107 
3108 *(void**)result = common->read_only_data_head;
3109 common->read_only_data_head = (void *)result;
3110 return result + 1;
3111 }
3112 
reset_ovector(compiler_common * common,int length)3113 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3114 {
3115 DEFINE_COMPILER;
3116 struct sljit_label *loop;
3117 sljit_s32 i;
3118 
3119 /* At this point we can freely use all temporary registers. */
3120 SLJIT_ASSERT(length > 1);
3121 /* TMP1 returns with begin - 1. */
3122 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3123 if (length < 8)
3124   {
3125   for (i = 1; i < length; i++)
3126     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3127   }
3128 else
3129   {
3130   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3131     {
3132     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3133     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3134     loop = LABEL();
3135     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3136     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3137     JUMPTO(SLJIT_NOT_ZERO, loop);
3138     }
3139   else
3140     {
3141     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3142     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3143     loop = LABEL();
3144     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3145     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3146     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3147     JUMPTO(SLJIT_NOT_ZERO, loop);
3148     }
3149   }
3150 }
3151 
reset_early_fail(compiler_common * common)3152 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3153 {
3154 DEFINE_COMPILER;
3155 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3156 sljit_u32 uncleared_size;
3157 sljit_s32 src = SLJIT_IMM;
3158 sljit_s32 i;
3159 struct sljit_label *loop;
3160 
3161 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3162 
3163 if (size == sizeof(sljit_sw))
3164   {
3165   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3166   return;
3167   }
3168 
3169 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3170   {
3171   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3172   src = TMP3;
3173   }
3174 
3175 if (size <= 6 * sizeof(sljit_sw))
3176   {
3177   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3178     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3179   return;
3180   }
3181 
3182 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3183 
3184 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3185 
3186 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3187 
3188 loop = LABEL();
3189 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3190 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3191 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3192 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3193 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3194 
3195 if (uncleared_size >= sizeof(sljit_sw))
3196   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3197 
3198 if (uncleared_size >= 2 * sizeof(sljit_sw))
3199   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3200 }
3201 
do_reset_match(compiler_common * common,int length)3202 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3203 {
3204 DEFINE_COMPILER;
3205 struct sljit_label *loop;
3206 int i;
3207 
3208 SLJIT_ASSERT(length > 1);
3209 /* OVECTOR(1) contains the "string begin - 1" constant. */
3210 if (length > 2)
3211   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3212 if (length < 8)
3213   {
3214   for (i = 2; i < length; i++)
3215     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3216   }
3217 else
3218   {
3219   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3220     {
3221     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3222     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3223     loop = LABEL();
3224     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3225     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3226     JUMPTO(SLJIT_NOT_ZERO, loop);
3227     }
3228   else
3229     {
3230     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3231     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3232     loop = LABEL();
3233     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3234     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3235     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3236     JUMPTO(SLJIT_NOT_ZERO, loop);
3237     }
3238   }
3239 
3240 if (!HAS_VIRTUAL_REGISTERS)
3241   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3242 else
3243   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3244 
3245 if (common->mark_ptr != 0)
3246   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3247 if (common->control_head_ptr != 0)
3248   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3249 if (HAS_VIRTUAL_REGISTERS)
3250   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3251 
3252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3254 }
3255 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3256 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3257 {
3258 while (current != NULL)
3259   {
3260   switch (current[1])
3261     {
3262     case type_then_trap:
3263     break;
3264 
3265     case type_mark:
3266     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3267       return current[3];
3268     break;
3269 
3270     default:
3271     SLJIT_UNREACHABLE();
3272     break;
3273     }
3274   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3275   current = (sljit_sw*)current[0];
3276   }
3277 return 0;
3278 }
3279 
copy_ovector(compiler_common * common,int topbracket)3280 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3281 {
3282 DEFINE_COMPILER;
3283 struct sljit_label *loop;
3284 BOOL has_pre;
3285 
3286 /* At this point we can freely use all registers. */
3287 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3289 
3290 if (HAS_VIRTUAL_REGISTERS)
3291   {
3292   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3293   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3294   if (common->mark_ptr != 0)
3295     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3296   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3297   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3298   if (common->mark_ptr != 0)
3299     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3300   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3301     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3302   }
3303 else
3304   {
3305   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3306   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3307   if (common->mark_ptr != 0)
3308     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3309   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3310   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3311   if (common->mark_ptr != 0)
3312     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3313   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3314   }
3315 
3316 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3317 
3318 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3319 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3320 
3321 loop = LABEL();
3322 
3323 if (has_pre)
3324   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3325 else
3326   {
3327   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3328   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3329   }
3330 
3331 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3332 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3333 /* Copy the integer value to the output buffer */
3334 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3335 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3336 #endif
3337 
3338 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3339 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3340 
3341 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3342 JUMPTO(SLJIT_NOT_ZERO, loop);
3343 
3344 /* Calculate the return value, which is the maximum ovector value. */
3345 if (topbracket > 1)
3346   {
3347   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3348     {
3349     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3350     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3351 
3352     /* OVECTOR(0) is never equal to SLJIT_S2. */
3353     loop = LABEL();
3354     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3355     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3356     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3357     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3358     }
3359   else
3360     {
3361     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3362     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3363 
3364     /* OVECTOR(0) is never equal to SLJIT_S2. */
3365     loop = LABEL();
3366     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3367     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3368     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3369     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3370     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3371     }
3372   }
3373 else
3374   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3375 }
3376 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3377 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3378 {
3379 DEFINE_COMPILER;
3380 sljit_s32 mov_opcode;
3381 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3382 
3383 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3384 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3385   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3386 
3387 if (arguments_reg != ARGUMENTS)
3388   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3389 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3390   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3391 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3392 
3393 /* Store match begin and end. */
3394 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3395 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3396 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3397 
3398 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3399 
3400 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3401 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3402 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3403 #endif
3404 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3405 
3406 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3407 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3409 #endif
3410 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3411 
3412 JUMPTO(SLJIT_JUMP, quit);
3413 }
3414 
check_start_used_ptr(compiler_common * common)3415 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3416 {
3417 /* May destroy TMP1. */
3418 DEFINE_COMPILER;
3419 struct sljit_jump *jump;
3420 
3421 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3422   {
3423   /* The value of -1 must be kept for start_used_ptr! */
3424   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3425   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3426   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3427   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3428   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3429   JUMPHERE(jump);
3430   }
3431 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3432   {
3433   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3434   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3435   JUMPHERE(jump);
3436   }
3437 }
3438 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3439 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3440 {
3441 /* Detects if the character has an othercase. */
3442 unsigned int c;
3443 
3444 #ifdef SUPPORT_UNICODE
3445 if (common->utf || common->ucp)
3446   {
3447   if (common->utf)
3448     {
3449     GETCHAR(c, cc);
3450     }
3451   else
3452     c = *cc;
3453 
3454   if (c > 127)
3455     return c != UCD_OTHERCASE(c);
3456 
3457   return common->fcc[c] != c;
3458   }
3459 else
3460 #endif
3461   c = *cc;
3462 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3463 }
3464 
char_othercase(compiler_common * common,unsigned int c)3465 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3466 {
3467 /* Returns with the othercase. */
3468 #ifdef SUPPORT_UNICODE
3469 if ((common->utf || common->ucp) && c > 127)
3470   return UCD_OTHERCASE(c);
3471 #endif
3472 return TABLE_GET(c, common->fcc, c);
3473 }
3474 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3475 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3476 {
3477 /* Detects if the character and its othercase has only 1 bit difference. */
3478 unsigned int c, oc, bit;
3479 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3480 int n;
3481 #endif
3482 
3483 #ifdef SUPPORT_UNICODE
3484 if (common->utf || common->ucp)
3485   {
3486   if (common->utf)
3487     {
3488     GETCHAR(c, cc);
3489     }
3490   else
3491     c = *cc;
3492 
3493   if (c <= 127)
3494     oc = common->fcc[c];
3495   else
3496     oc = UCD_OTHERCASE(c);
3497   }
3498 else
3499   {
3500   c = *cc;
3501   oc = TABLE_GET(c, common->fcc, c);
3502   }
3503 #else
3504 c = *cc;
3505 oc = TABLE_GET(c, common->fcc, c);
3506 #endif
3507 
3508 SLJIT_ASSERT(c != oc);
3509 
3510 bit = c ^ oc;
3511 /* Optimized for English alphabet. */
3512 if (c <= 127 && bit == 0x20)
3513   return (0 << 8) | 0x20;
3514 
3515 /* Since c != oc, they must have at least 1 bit difference. */
3516 if (!is_powerof2(bit))
3517   return 0;
3518 
3519 #if PCRE2_CODE_UNIT_WIDTH == 8
3520 
3521 #ifdef SUPPORT_UNICODE
3522 if (common->utf && c > 127)
3523   {
3524   n = GET_EXTRALEN(*cc);
3525   while ((bit & 0x3f) == 0)
3526     {
3527     n--;
3528     bit >>= 6;
3529     }
3530   return (n << 8) | bit;
3531   }
3532 #endif /* SUPPORT_UNICODE */
3533 return (0 << 8) | bit;
3534 
3535 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3536 
3537 #ifdef SUPPORT_UNICODE
3538 if (common->utf && c > 65535)
3539   {
3540   if (bit >= (1u << 10))
3541     bit >>= 10;
3542   else
3543     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3544   }
3545 #endif /* SUPPORT_UNICODE */
3546 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3547 
3548 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3549 }
3550 
check_partial(compiler_common * common,BOOL force)3551 static void check_partial(compiler_common *common, BOOL force)
3552 {
3553 /* Checks whether a partial matching is occurred. Does not modify registers. */
3554 DEFINE_COMPILER;
3555 struct sljit_jump *jump = NULL;
3556 
3557 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3558 
3559 if (common->mode == PCRE2_JIT_COMPLETE)
3560   return;
3561 
3562 if (!force && !common->allow_empty_partial)
3563   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3564 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3565   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3566 
3567 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3568   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3569 else
3570   {
3571   if (common->partialmatchlabel != NULL)
3572     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3573   else
3574     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3575   }
3576 
3577 if (jump != NULL)
3578   JUMPHERE(jump);
3579 }
3580 
check_str_end(compiler_common * common,jump_list ** end_reached)3581 static void check_str_end(compiler_common *common, jump_list **end_reached)
3582 {
3583 /* Does not affect registers. Usually used in a tight spot. */
3584 DEFINE_COMPILER;
3585 struct sljit_jump *jump;
3586 
3587 if (common->mode == PCRE2_JIT_COMPLETE)
3588   {
3589   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3590   return;
3591   }
3592 
3593 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3594 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3595   {
3596   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3597   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3598   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3599   }
3600 else
3601   {
3602   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3603   if (common->partialmatchlabel != NULL)
3604     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3605   else
3606     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3607   }
3608 JUMPHERE(jump);
3609 }
3610 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3611 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3612 {
3613 DEFINE_COMPILER;
3614 struct sljit_jump *jump;
3615 
3616 if (common->mode == PCRE2_JIT_COMPLETE)
3617   {
3618   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3619   return;
3620   }
3621 
3622 /* Partial matching mode. */
3623 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3624 if (!common->allow_empty_partial)
3625   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3626 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3627   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3628 
3629 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3630   {
3631   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3632   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3633   }
3634 else
3635   {
3636   if (common->partialmatchlabel != NULL)
3637     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3638   else
3639     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3640   }
3641 JUMPHERE(jump);
3642 }
3643 
process_partial_match(compiler_common * common)3644 static void process_partial_match(compiler_common *common)
3645 {
3646 DEFINE_COMPILER;
3647 struct sljit_jump *jump;
3648 
3649 /* Partial matching mode. */
3650 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3651   {
3652   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3653   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3654   JUMPHERE(jump);
3655   }
3656 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3657   {
3658   if (common->partialmatchlabel != NULL)
3659     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3660   else
3661     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3662   }
3663 }
3664 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3665 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3666 {
3667 DEFINE_COMPILER;
3668 
3669 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3670 process_partial_match(common);
3671 }
3672 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3673 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3674 {
3675 /* Reads the character into TMP1, keeps STR_PTR.
3676 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3677 DEFINE_COMPILER;
3678 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3679 struct sljit_jump *jump;
3680 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3681 
3682 SLJIT_UNUSED_ARG(max);
3683 SLJIT_UNUSED_ARG(dst);
3684 SLJIT_UNUSED_ARG(dstw);
3685 SLJIT_UNUSED_ARG(backtracks);
3686 
3687 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3688 
3689 #ifdef SUPPORT_UNICODE
3690 #if PCRE2_CODE_UNIT_WIDTH == 8
3691 if (common->utf)
3692   {
3693   if (max < 128) return;
3694 
3695   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3696   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3697   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3698   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3699   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3700   if (backtracks && common->invalid_utf)
3701     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3702   JUMPHERE(jump);
3703   }
3704 #elif PCRE2_CODE_UNIT_WIDTH == 16
3705 if (common->utf)
3706   {
3707   if (max < 0xd800) return;
3708 
3709   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3710 
3711   if (common->invalid_utf)
3712     {
3713     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3714     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3715     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3716     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3717     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3718     if (backtracks && common->invalid_utf)
3719       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3720     }
3721   else
3722     {
3723     /* TMP2 contains the high surrogate. */
3724     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3725     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3726     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3727     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3728     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3729     }
3730 
3731   JUMPHERE(jump);
3732   }
3733 #elif PCRE2_CODE_UNIT_WIDTH == 32
3734 if (common->invalid_utf)
3735   {
3736   if (max < 0xd800) return;
3737 
3738   if (backtracks != NULL)
3739     {
3740     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3741     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3742     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3743     }
3744   else
3745     {
3746     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3747     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3748     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3749     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3750     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3751     }
3752   }
3753 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3754 #endif /* SUPPORT_UNICODE */
3755 }
3756 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3757 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3758 {
3759 /* Reads one character back without moving STR_PTR. TMP2 must
3760 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3761 DEFINE_COMPILER;
3762 
3763 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3764 struct sljit_jump *jump;
3765 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3766 
3767 SLJIT_UNUSED_ARG(max);
3768 SLJIT_UNUSED_ARG(backtracks);
3769 
3770 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3771 
3772 #ifdef SUPPORT_UNICODE
3773 #if PCRE2_CODE_UNIT_WIDTH == 8
3774 if (common->utf)
3775   {
3776   if (max < 128) return;
3777 
3778   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3779   if (common->invalid_utf)
3780     {
3781     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3782     if (backtracks != NULL)
3783       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3784     }
3785   else
3786     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3787   JUMPHERE(jump);
3788   }
3789 #elif PCRE2_CODE_UNIT_WIDTH == 16
3790 if (common->utf)
3791   {
3792   if (max < 0xd800) return;
3793 
3794   if (common->invalid_utf)
3795     {
3796     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3797     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3798     if (backtracks != NULL)
3799       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3800     }
3801   else
3802     {
3803     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3804     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3805     /* TMP2 contains the low surrogate. */
3806     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3807     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3808     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3809     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3810     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3811     }
3812     JUMPHERE(jump);
3813   }
3814 #elif PCRE2_CODE_UNIT_WIDTH == 32
3815 if (common->invalid_utf)
3816   {
3817   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3818   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3819   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3820   }
3821 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3822 #endif /* SUPPORT_UNICODE */
3823 }
3824 
3825 #define READ_CHAR_UPDATE_STR_PTR 0x1
3826 #define READ_CHAR_UTF8_NEWLINE 0x2
3827 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3828 #define READ_CHAR_VALID_UTF 0x4
3829 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3830 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3831   jump_list **backtracks, sljit_u32 options)
3832 {
3833 /* Reads the precise value of a character into TMP1, if the character is
3834 between min and max (c >= min && c <= max). Otherwise it returns with a value
3835 outside the range. Does not check STR_END. */
3836 DEFINE_COMPILER;
3837 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3838 struct sljit_jump *jump;
3839 #endif
3840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3841 struct sljit_jump *jump2;
3842 #endif
3843 
3844 SLJIT_UNUSED_ARG(min);
3845 SLJIT_UNUSED_ARG(max);
3846 SLJIT_UNUSED_ARG(backtracks);
3847 SLJIT_UNUSED_ARG(options);
3848 SLJIT_ASSERT(min <= max);
3849 
3850 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3851 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3852 
3853 #ifdef SUPPORT_UNICODE
3854 #if PCRE2_CODE_UNIT_WIDTH == 8
3855 if (common->utf)
3856   {
3857   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3858 
3859   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3860     {
3861     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3862 
3863     if (options & READ_CHAR_UTF8_NEWLINE)
3864       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3865     else
3866       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3867 
3868     if (backtracks != NULL)
3869       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3870     JUMPHERE(jump);
3871     return;
3872     }
3873 
3874   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3875   if (min >= 0x10000)
3876     {
3877     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3878     if (options & READ_CHAR_UPDATE_STR_PTR)
3879       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3880     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3882     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3883     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3884     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3885     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3886     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3887     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3888     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3889     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3890     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3891       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3892     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3893     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3894     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3895     JUMPHERE(jump2);
3896     if (options & READ_CHAR_UPDATE_STR_PTR)
3897       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3898     }
3899   else if (min >= 0x800 && max <= 0xffff)
3900     {
3901     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3902     if (options & READ_CHAR_UPDATE_STR_PTR)
3903       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3904     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3905     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3906     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3907     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3908     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3909     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3910     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3911       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3912     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3913     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3914     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3915     JUMPHERE(jump2);
3916     if (options & READ_CHAR_UPDATE_STR_PTR)
3917       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3918     }
3919   else if (max >= 0x800)
3920     {
3921     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3922     }
3923   else if (max < 128)
3924     {
3925     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3926     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3927     }
3928   else
3929     {
3930     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3931     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3932       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3933     else
3934       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3935     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3936     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3937     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3938     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3939     if (options & READ_CHAR_UPDATE_STR_PTR)
3940       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3941     }
3942   JUMPHERE(jump);
3943   }
3944 #elif PCRE2_CODE_UNIT_WIDTH == 16
3945 if (common->utf)
3946   {
3947   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3948 
3949   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3950     {
3951     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3952     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3953 
3954     if (options & READ_CHAR_UTF8_NEWLINE)
3955       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3956     else
3957       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3958 
3959     if (backtracks != NULL)
3960       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3961     JUMPHERE(jump);
3962     return;
3963     }
3964 
3965   if (max >= 0x10000)
3966     {
3967     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3968     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3969     /* TMP2 contains the high surrogate. */
3970     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3971     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3972     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3973     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3974     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3975     JUMPHERE(jump);
3976     return;
3977     }
3978 
3979   /* Skip low surrogate if necessary. */
3980   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3981 
3982   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3983     {
3984     if (options & READ_CHAR_UPDATE_STR_PTR)
3985       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3986     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3987     if (options & READ_CHAR_UPDATE_STR_PTR)
3988       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3989     if (max >= 0xd800)
3990       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3991     }
3992   else
3993     {
3994     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3995     if (options & READ_CHAR_UPDATE_STR_PTR)
3996       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3997     if (max >= 0xd800)
3998       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3999     JUMPHERE(jump);
4000     }
4001   }
4002 #elif PCRE2_CODE_UNIT_WIDTH == 32
4003 if (common->invalid_utf)
4004   {
4005   if (backtracks != NULL)
4006     {
4007     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4008     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4009     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4010     }
4011   else
4012     {
4013     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4014     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4015     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4016     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4017     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4018     }
4019   }
4020 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4021 #endif /* SUPPORT_UNICODE */
4022 }
4023 
4024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4025 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4026 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4027 {
4028 /* Tells whether the character codes below 128 are enough
4029 to determine a match. */
4030 const sljit_u8 value = nclass ? 0xff : 0;
4031 const sljit_u8 *end = bitset + 32;
4032 
4033 bitset += 16;
4034 do
4035   {
4036   if (*bitset++ != value)
4037     return FALSE;
4038   }
4039 while (bitset < end);
4040 return TRUE;
4041 }
4042 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4043 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4044 {
4045 /* Reads the precise character type of a character into TMP1, if the character
4046 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4047 full_read argument tells whether characters above max are accepted or not. */
4048 DEFINE_COMPILER;
4049 struct sljit_jump *jump;
4050 
4051 SLJIT_ASSERT(common->utf);
4052 
4053 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4054 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4055 
4056 /* All values > 127 are zero in ctypes. */
4057 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4058 
4059 if (negated)
4060   {
4061   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4062 
4063   if (common->invalid_utf)
4064     {
4065     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4066     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4067     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4068     }
4069   else
4070     {
4071     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4072     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4073     }
4074   JUMPHERE(jump);
4075   }
4076 }
4077 
4078 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4079 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4080 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4081 {
4082 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4083 DEFINE_COMPILER;
4084 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4085 struct sljit_jump *jump;
4086 #endif
4087 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4088 struct sljit_jump *jump2;
4089 #endif
4090 
4091 SLJIT_UNUSED_ARG(backtracks);
4092 SLJIT_UNUSED_ARG(negated);
4093 
4094 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4096 
4097 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098 if (common->utf)
4099   {
4100   /* The result of this read may be unused, but saves an "else" part. */
4101   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4102   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4103 
4104   if (!negated)
4105     {
4106     if (common->invalid_utf)
4107       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4108 
4109     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4110     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4111     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4112     if (common->invalid_utf)
4113       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4114 
4115     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4116     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4117     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4118     if (common->invalid_utf)
4119       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4120 
4121     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4122     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4123     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4124     JUMPHERE(jump2);
4125     }
4126   else if (common->invalid_utf)
4127     {
4128     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4129     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4130     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4131 
4132     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4133     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4134     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4135     JUMPHERE(jump2);
4136     }
4137   else
4138     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4139 
4140   JUMPHERE(jump);
4141   return;
4142   }
4143 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4144 
4145 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4146 if (common->invalid_utf && negated)
4147   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4148 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4149 
4150 #if PCRE2_CODE_UNIT_WIDTH != 8
4151 /* The ctypes array contains only 256 values. */
4152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4153 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4154 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4155 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4156 #if PCRE2_CODE_UNIT_WIDTH != 8
4157 JUMPHERE(jump);
4158 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4159 
4160 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4161 if (common->utf && negated)
4162   {
4163   /* Skip low surrogate if necessary. */
4164   if (!common->invalid_utf)
4165     {
4166     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4167 
4168     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4169       {
4170       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4171       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4172       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4173       }
4174     else
4175       {
4176       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4177       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4178       JUMPHERE(jump);
4179       }
4180     return;
4181     }
4182 
4183   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4184   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4185   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4186   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4187 
4188   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4189   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4191   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4192 
4193   JUMPHERE(jump);
4194   return;
4195   }
4196 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4197 }
4198 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4199 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4200 {
4201 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4202 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4203 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4204 DEFINE_COMPILER;
4205 
4206 SLJIT_UNUSED_ARG(backtracks);
4207 SLJIT_UNUSED_ARG(must_be_valid);
4208 
4209 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4210 struct sljit_jump *jump;
4211 #endif
4212 
4213 #ifdef SUPPORT_UNICODE
4214 #if PCRE2_CODE_UNIT_WIDTH == 8
4215 struct sljit_label *label;
4216 
4217 if (common->utf)
4218   {
4219   if (!must_be_valid && common->invalid_utf)
4220     {
4221     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4222     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4223     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4224     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4225     if (backtracks != NULL)
4226       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4227     JUMPHERE(jump);
4228     return;
4229     }
4230 
4231   label = LABEL();
4232   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4233   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4234   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4235   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4236   return;
4237   }
4238 #elif PCRE2_CODE_UNIT_WIDTH == 16
4239 if (common->utf)
4240   {
4241   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4242   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4243 
4244   if (!must_be_valid && common->invalid_utf)
4245     {
4246     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4247     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4248     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4249     if (backtracks != NULL)
4250       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4251     JUMPHERE(jump);
4252     return;
4253     }
4254 
4255   /* Skip low surrogate if necessary. */
4256   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4257   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4258   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4259   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4260   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4261   return;
4262   }
4263 #elif PCRE2_CODE_UNIT_WIDTH == 32
4264 if (common->invalid_utf && !must_be_valid)
4265   {
4266   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4267   if (backtracks != NULL)
4268     {
4269     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4270     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4271     return;
4272     }
4273 
4274   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4275   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4276   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4277   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4278   return;
4279   }
4280 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4281 #endif /* SUPPORT_UNICODE */
4282 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4283 }
4284 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4285 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4286 {
4287 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4288 DEFINE_COMPILER;
4289 struct sljit_jump *jump;
4290 
4291 if (nltype == NLTYPE_ANY)
4292   {
4293   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4294   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4295   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4296   }
4297 else if (nltype == NLTYPE_ANYCRLF)
4298   {
4299   if (jumpifmatch)
4300     {
4301     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4302     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4303     }
4304   else
4305     {
4306     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4307     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4308     JUMPHERE(jump);
4309     }
4310   }
4311 else
4312   {
4313   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4314   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4315   }
4316 }
4317 
4318 #ifdef SUPPORT_UNICODE
4319 
4320 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4321 static void do_utfreadchar(compiler_common *common)
4322 {
4323 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4324 of the character (>= 0xc0). Return char value in TMP1. */
4325 DEFINE_COMPILER;
4326 struct sljit_jump *jump;
4327 
4328 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4329 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4330 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4331 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4332 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4333 
4334 /* Searching for the first zero. */
4335 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4336 jump = JUMP(SLJIT_NOT_ZERO);
4337 /* Two byte sequence. */
4338 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4340 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4341 
4342 JUMPHERE(jump);
4343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4344 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4345 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4347 
4348 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4349 jump = JUMP(SLJIT_NOT_ZERO);
4350 /* Three byte sequence. */
4351 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4352 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4353 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4354 
4355 /* Four byte sequence. */
4356 JUMPHERE(jump);
4357 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4358 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4360 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4361 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4362 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4363 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4364 }
4365 
do_utfreadtype8(compiler_common * common)4366 static void do_utfreadtype8(compiler_common *common)
4367 {
4368 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4369 of the character (>= 0xc0). Return value in TMP1. */
4370 DEFINE_COMPILER;
4371 struct sljit_jump *jump;
4372 struct sljit_jump *compare;
4373 
4374 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4375 
4376 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4377 jump = JUMP(SLJIT_NOT_ZERO);
4378 /* Two byte sequence. */
4379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4381 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4382 /* The upper 5 bits are known at this point. */
4383 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4384 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4385 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4386 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4387 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4388 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4389 
4390 JUMPHERE(compare);
4391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4392 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4393 
4394 /* We only have types for characters less than 256. */
4395 JUMPHERE(jump);
4396 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4399 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4400 }
4401 
do_utfreadchar_invalid(compiler_common * common)4402 static void do_utfreadchar_invalid(compiler_common *common)
4403 {
4404 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4405 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4406 undefined for invalid characters. */
4407 DEFINE_COMPILER;
4408 sljit_s32 i;
4409 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4410 struct sljit_jump *jump;
4411 struct sljit_jump *buffer_end_close;
4412 struct sljit_label *three_byte_entry;
4413 struct sljit_label *exit_invalid_label;
4414 struct sljit_jump *exit_invalid[11];
4415 
4416 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4417 
4418 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4419 
4420 /* Usually more than 3 characters remained in the subject buffer. */
4421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4422 
4423 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4424 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4425 
4426 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4427 
4428 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4431 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4432 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4433 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4434 
4435 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4436 jump = JUMP(SLJIT_NOT_ZERO);
4437 
4438 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4439 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4440 
4441 JUMPHERE(jump);
4442 
4443 /* Three-byte sequence. */
4444 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4445 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4446 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4447 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4448 if (has_cmov)
4449   {
4450   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4451   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4452   exit_invalid[2] = NULL;
4453   }
4454 else
4455   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4456 
4457 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4458 jump = JUMP(SLJIT_NOT_ZERO);
4459 
4460 three_byte_entry = LABEL();
4461 
4462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4463 if (has_cmov)
4464   {
4465   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4466   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4467   exit_invalid[3] = NULL;
4468   }
4469 else
4470   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4472 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4473 
4474 if (has_cmov)
4475   {
4476   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4477   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4478   exit_invalid[4] = NULL;
4479   }
4480 else
4481   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4482 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4483 
4484 JUMPHERE(jump);
4485 
4486 /* Four-byte sequence. */
4487 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4488 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4489 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4490 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4491 if (has_cmov)
4492   {
4493   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4494   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4495   exit_invalid[5] = NULL;
4496   }
4497 else
4498   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4499 
4500 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4501 if (has_cmov)
4502   {
4503   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4504   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4505   exit_invalid[6] = NULL;
4506   }
4507 else
4508   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4509 
4510 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4511 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4512 
4513 JUMPHERE(buffer_end_close);
4514 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4515 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4516 
4517 /* Two-byte sequence. */
4518 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4521 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4522 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4523 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4524 
4525 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4526 jump = JUMP(SLJIT_NOT_ZERO);
4527 
4528 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4529 
4530 /* Three-byte sequence. */
4531 JUMPHERE(jump);
4532 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4533 
4534 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4535 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4536 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4537 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4538 if (has_cmov)
4539   {
4540   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4541   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4542   exit_invalid[10] = NULL;
4543   }
4544 else
4545   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4546 
4547 /* One will be substracted from STR_PTR later. */
4548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4549 
4550 /* Four byte sequences are not possible. */
4551 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4552 
4553 exit_invalid_label = LABEL();
4554 for (i = 0; i < 11; i++)
4555   sljit_set_label(exit_invalid[i], exit_invalid_label);
4556 
4557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4558 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4559 }
4560 
do_utfreadnewline_invalid(compiler_common * common)4561 static void do_utfreadnewline_invalid(compiler_common *common)
4562 {
4563 /* Slow decoding a UTF-8 character, specialized for newlines.
4564 TMP1 contains the first byte of the character (>= 0xc0). Return
4565 char value in TMP1. */
4566 DEFINE_COMPILER;
4567 struct sljit_label *loop;
4568 struct sljit_label *skip_start;
4569 struct sljit_label *three_byte_exit;
4570 struct sljit_jump *jump[5];
4571 
4572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4573 
4574 if (common->nltype != NLTYPE_ANY)
4575   {
4576   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4577 
4578   /* All newlines are ascii, just skip intermediate octets. */
4579   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4580   loop = LABEL();
4581   if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4582     sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4583   else
4584     {
4585     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4586     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587     }
4588 
4589   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4590   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4591   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4592 
4593   JUMPHERE(jump[0]);
4594 
4595   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4596   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4597   return;
4598   }
4599 
4600 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4603 
4604 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4605 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4606 
4607 skip_start = LABEL();
4608 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4609 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4610 
4611 /* Skip intermediate octets. */
4612 loop = LABEL();
4613 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4614 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4616 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4617 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4618 
4619 JUMPHERE(jump[3]);
4620 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4621 
4622 three_byte_exit = LABEL();
4623 JUMPHERE(jump[0]);
4624 JUMPHERE(jump[4]);
4625 
4626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628 
4629 /* Two byte long newline: 0x85. */
4630 JUMPHERE(jump[1]);
4631 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4632 
4633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4634 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4635 
4636 /* Three byte long newlines: 0x2028 and 0x2029. */
4637 JUMPHERE(jump[2]);
4638 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4639 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4640 
4641 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4642 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4643 
4644 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4645 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4646 
4647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4648 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4649 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4650 }
4651 
do_utfmoveback_invalid(compiler_common * common)4652 static void do_utfmoveback_invalid(compiler_common *common)
4653 {
4654 /* Goes one character back. */
4655 DEFINE_COMPILER;
4656 sljit_s32 i;
4657 struct sljit_jump *jump;
4658 struct sljit_jump *buffer_start_close;
4659 struct sljit_label *exit_ok_label;
4660 struct sljit_label *exit_invalid_label;
4661 struct sljit_jump *exit_invalid[7];
4662 
4663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4664 
4665 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4666 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4667 
4668 /* Two-byte sequence. */
4669 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4670 
4671 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4672 
4673 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4674 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4675 
4676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4678 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4679 
4680 /* Three-byte sequence. */
4681 JUMPHERE(jump);
4682 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4683 
4684 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4685 
4686 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4687 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4688 
4689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4691 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4692 
4693 /* Four-byte sequence. */
4694 JUMPHERE(jump);
4695 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4696 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4697 
4698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4700 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4701 
4702 exit_ok_label = LABEL();
4703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4704 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4705 
4706 /* Two-byte sequence. */
4707 JUMPHERE(buffer_start_close);
4708 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4709 
4710 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4711 
4712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4713 
4714 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4715 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4716 
4717 /* Three-byte sequence. */
4718 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4719 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4720 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4721 
4722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4723 
4724 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4725 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4726 
4727 /* Four-byte sequences are not possible. */
4728 
4729 exit_invalid_label = LABEL();
4730 sljit_set_label(exit_invalid[5], exit_invalid_label);
4731 sljit_set_label(exit_invalid[6], exit_invalid_label);
4732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4734 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4735 
4736 JUMPHERE(exit_invalid[4]);
4737 /* -2 + 4 = 2 */
4738 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4739 
4740 exit_invalid_label = LABEL();
4741 for (i = 0; i < 4; i++)
4742   sljit_set_label(exit_invalid[i], exit_invalid_label);
4743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4745 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4746 }
4747 
do_utfpeakcharback(compiler_common * common)4748 static void do_utfpeakcharback(compiler_common *common)
4749 {
4750 /* Peak a character back. Does not modify STR_PTR. */
4751 DEFINE_COMPILER;
4752 struct sljit_jump *jump[2];
4753 
4754 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4755 
4756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4758 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4759 
4760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4761 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4762 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4763 
4764 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4765 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4766 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4767 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4768 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4769 
4770 JUMPHERE(jump[1]);
4771 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4772 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4773 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4775 
4776 JUMPHERE(jump[0]);
4777 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4778 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4779 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4780 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4781 
4782 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4783 }
4784 
do_utfpeakcharback_invalid(compiler_common * common)4785 static void do_utfpeakcharback_invalid(compiler_common *common)
4786 {
4787 /* Peak a character back. Does not modify STR_PTR. */
4788 DEFINE_COMPILER;
4789 sljit_s32 i;
4790 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4791 struct sljit_jump *jump[2];
4792 struct sljit_label *two_byte_entry;
4793 struct sljit_label *three_byte_entry;
4794 struct sljit_label *exit_invalid_label;
4795 struct sljit_jump *exit_invalid[8];
4796 
4797 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4798 
4799 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4800 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4801 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4802 
4803 /* Two-byte sequence. */
4804 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4805 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4806 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4807 
4808 two_byte_entry = LABEL();
4809 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4810 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4811 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4812 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4813 
4814 JUMPHERE(jump[1]);
4815 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4816 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4817 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4818 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4819 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4820 
4821 /* Three-byte sequence. */
4822 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4823 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4824 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4825 
4826 three_byte_entry = LABEL();
4827 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4828 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4829 
4830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4831 if (has_cmov)
4832   {
4833   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4834   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4835   exit_invalid[2] = NULL;
4836   }
4837 else
4838   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4839 
4840 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4841 if (has_cmov)
4842   {
4843   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4844   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4845   exit_invalid[3] = NULL;
4846   }
4847 else
4848   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4849 
4850 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4851 
4852 JUMPHERE(jump[1]);
4853 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4854 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4855 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4856 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4857 
4858 /* Four-byte sequence. */
4859 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4860 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4861 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4863 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4864 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4865 
4866 if (has_cmov)
4867   {
4868   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4869   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4870   exit_invalid[5] = NULL;
4871   }
4872 else
4873   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4874 
4875 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4876 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4877 
4878 JUMPHERE(jump[0]);
4879 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4880 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4881 
4882 /* Two-byte sequence. */
4883 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4884 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4885 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4886 
4887 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4888 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4889 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4890 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4891 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4892 
4893 /* Three-byte sequence. */
4894 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4895 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4896 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4897 
4898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4899 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4900 
4901 JUMPHERE(jump[0]);
4902 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4903 
4904 /* Two-byte sequence. */
4905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4906 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4907 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4908 
4909 exit_invalid_label = LABEL();
4910 for (i = 0; i < 8; i++)
4911   sljit_set_label(exit_invalid[i], exit_invalid_label);
4912 
4913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4914 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4915 }
4916 
4917 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4918 
4919 #if PCRE2_CODE_UNIT_WIDTH == 16
4920 
do_utfreadchar_invalid(compiler_common * common)4921 static void do_utfreadchar_invalid(compiler_common *common)
4922 {
4923 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4924 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4925 undefined for invalid characters. */
4926 DEFINE_COMPILER;
4927 struct sljit_jump *exit_invalid[3];
4928 
4929 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4930 
4931 /* TMP2 contains the high surrogate. */
4932 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4933 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4934 
4935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4938 
4939 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4940 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4941 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4942 
4943 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4944 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4945 
4946 JUMPHERE(exit_invalid[0]);
4947 JUMPHERE(exit_invalid[1]);
4948 JUMPHERE(exit_invalid[2]);
4949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4950 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951 }
4952 
do_utfreadnewline_invalid(compiler_common * common)4953 static void do_utfreadnewline_invalid(compiler_common *common)
4954 {
4955 /* Slow decoding a UTF-16 character, specialized for newlines.
4956 TMP1 contains the first half of the character (>= 0xd800). Return
4957 char value in TMP1. */
4958 
4959 DEFINE_COMPILER;
4960 struct sljit_jump *exit_invalid[2];
4961 
4962 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4963 
4964 /* TMP2 contains the high surrogate. */
4965 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966 
4967 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4968 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4969 
4970 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4971 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4974 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4975 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4976 
4977 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4978 
4979 JUMPHERE(exit_invalid[0]);
4980 JUMPHERE(exit_invalid[1]);
4981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4982 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4983 }
4984 
do_utfmoveback_invalid(compiler_common * common)4985 static void do_utfmoveback_invalid(compiler_common *common)
4986 {
4987 /* Goes one character back. */
4988 DEFINE_COMPILER;
4989 struct sljit_jump *exit_invalid[3];
4990 
4991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4992 
4993 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4994 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4995 
4996 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4997 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4998 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4999 
5000 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5001 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5002 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5003 
5004 JUMPHERE(exit_invalid[0]);
5005 JUMPHERE(exit_invalid[1]);
5006 JUMPHERE(exit_invalid[2]);
5007 
5008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5010 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5011 }
5012 
do_utfpeakcharback_invalid(compiler_common * common)5013 static void do_utfpeakcharback_invalid(compiler_common *common)
5014 {
5015 /* Peak a character back. Does not modify STR_PTR. */
5016 DEFINE_COMPILER;
5017 struct sljit_jump *jump;
5018 struct sljit_jump *exit_invalid[3];
5019 
5020 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5021 
5022 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5023 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5024 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5025 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5026 
5027 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5029 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5030 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5031 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5032 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5033 
5034 JUMPHERE(jump);
5035 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5036 
5037 JUMPHERE(exit_invalid[0]);
5038 JUMPHERE(exit_invalid[1]);
5039 JUMPHERE(exit_invalid[2]);
5040 
5041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5042 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5043 }
5044 
5045 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5046 
5047 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5048 #define UCD_BLOCK_MASK 127
5049 #define UCD_BLOCK_SHIFT 7
5050 
do_getucd(compiler_common * common)5051 static void do_getucd(compiler_common *common)
5052 {
5053 /* Search the UCD record for the character comes in TMP1.
5054 Returns chartype in TMP1 and UCD offset in TMP2. */
5055 DEFINE_COMPILER;
5056 #if PCRE2_CODE_UNIT_WIDTH == 32
5057 struct sljit_jump *jump;
5058 #endif
5059 
5060 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5061 /* dummy_ucd_record */
5062 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5063 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5064 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5065 #endif
5066 
5067 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5068 
5069 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5070 
5071 #if PCRE2_CODE_UNIT_WIDTH == 32
5072 if (!common->utf)
5073   {
5074   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5075   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5076   JUMPHERE(jump);
5077   }
5078 #endif
5079 
5080 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5082 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5083 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5084 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5085 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5087 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5088 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089 }
5090 
do_getucdtype(compiler_common * common)5091 static void do_getucdtype(compiler_common *common)
5092 {
5093 /* Search the UCD record for the character comes in TMP1.
5094 Returns chartype in TMP1 and UCD offset in TMP2. */
5095 DEFINE_COMPILER;
5096 #if PCRE2_CODE_UNIT_WIDTH == 32
5097 struct sljit_jump *jump;
5098 #endif
5099 
5100 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5101 /* dummy_ucd_record */
5102 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5103 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5104 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5105 #endif
5106 
5107 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5108 
5109 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5110 
5111 #if PCRE2_CODE_UNIT_WIDTH == 32
5112 if (!common->utf)
5113   {
5114   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5115   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5116   JUMPHERE(jump);
5117   }
5118 #endif
5119 
5120 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5122 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5123 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5124 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5125 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5127 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5128 
5129 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5131 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5132 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5133 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5134 
5135 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5136 }
5137 
5138 #endif /* SUPPORT_UNICODE */
5139 
mainloop_entry(compiler_common * common)5140 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5141 {
5142 DEFINE_COMPILER;
5143 struct sljit_label *mainloop;
5144 struct sljit_label *newlinelabel = NULL;
5145 struct sljit_jump *start;
5146 struct sljit_jump *end = NULL;
5147 struct sljit_jump *end2 = NULL;
5148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5149 struct sljit_label *loop;
5150 struct sljit_jump *jump;
5151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5152 jump_list *newline = NULL;
5153 sljit_u32 overall_options = common->re->overall_options;
5154 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5155 BOOL newlinecheck = FALSE;
5156 BOOL readuchar = FALSE;
5157 
5158 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5159     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5160   newlinecheck = TRUE;
5161 
5162 SLJIT_ASSERT(common->abort_label == NULL);
5163 
5164 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5165   {
5166   /* Search for the end of the first line. */
5167   SLJIT_ASSERT(common->match_end_ptr != 0);
5168   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5169 
5170   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5171     {
5172     mainloop = LABEL();
5173     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5174     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5175     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5176     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5177     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5178     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5179     JUMPHERE(end);
5180     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5181     }
5182   else
5183     {
5184     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5185     mainloop = LABEL();
5186     /* Continual stores does not cause data dependency. */
5187     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5188     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5189     check_newlinechar(common, common->nltype, &newline, TRUE);
5190     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5191     JUMPHERE(end);
5192     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5193     set_jumps(newline, LABEL());
5194     }
5195 
5196   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5197   }
5198 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5199   {
5200   /* Check whether offset limit is set and valid. */
5201   SLJIT_ASSERT(common->match_end_ptr != 0);
5202 
5203   if (HAS_VIRTUAL_REGISTERS)
5204     {
5205     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5206     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5207     }
5208   else
5209     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5210 
5211   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5212   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5213   if (HAS_VIRTUAL_REGISTERS)
5214     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5215   else
5216     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5217 
5218 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5219   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5220 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5221   if (HAS_VIRTUAL_REGISTERS)
5222     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5223 
5224   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5225   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5226   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5227   JUMPHERE(end2);
5228   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5229   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5230   JUMPHERE(end);
5231   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5232   }
5233 
5234 start = JUMP(SLJIT_JUMP);
5235 
5236 if (newlinecheck)
5237   {
5238   newlinelabel = LABEL();
5239   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5240   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5241   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5242   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5243   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5244 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5245   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5246 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5247   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5248   end2 = JUMP(SLJIT_JUMP);
5249   }
5250 
5251 mainloop = LABEL();
5252 
5253 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5254 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5255 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5256 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5257 if (newlinecheck) readuchar = TRUE;
5258 
5259 if (readuchar)
5260   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5261 
5262 if (newlinecheck)
5263   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5264 
5265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5266 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5267 #if PCRE2_CODE_UNIT_WIDTH == 8
5268 if (common->invalid_utf)
5269   {
5270   /* Skip continuation code units. */
5271   loop = LABEL();
5272   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5273   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5274   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5275   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5276   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5277   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5278   JUMPHERE(jump);
5279   }
5280 else if (common->utf)
5281   {
5282   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5283   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5284   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5285   JUMPHERE(jump);
5286   }
5287 #elif PCRE2_CODE_UNIT_WIDTH == 16
5288 if (common->invalid_utf)
5289   {
5290   /* Skip continuation code units. */
5291   loop = LABEL();
5292   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5293   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5294   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5295   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5296   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5297   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5298   JUMPHERE(jump);
5299   }
5300 else if (common->utf)
5301   {
5302   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5303 
5304   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5305     {
5306     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5307     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5308     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5309     }
5310   else
5311     {
5312     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5313     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5314     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5316     }
5317   }
5318 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5319 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5320 JUMPHERE(start);
5321 
5322 if (newlinecheck)
5323   {
5324   JUMPHERE(end);
5325   JUMPHERE(end2);
5326   }
5327 
5328 return mainloop;
5329 }
5330 
5331 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5332 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5333 {
5334 sljit_u32 i, count = chars->count;
5335 
5336 if (count == 255)
5337   return;
5338 
5339 if (count == 0)
5340   {
5341   chars->count = 1;
5342   chars->chars[0] = chr;
5343 
5344   if (last)
5345     chars->last_count = 1;
5346   return;
5347   }
5348 
5349 for (i = 0; i < count; i++)
5350   if (chars->chars[i] == chr)
5351     return;
5352 
5353 if (count >= MAX_DIFF_CHARS)
5354   {
5355   chars->count = 255;
5356   return;
5357   }
5358 
5359 chars->chars[count] = chr;
5360 chars->count = count + 1;
5361 
5362 if (last)
5363   chars->last_count++;
5364 }
5365 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5366 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5367 {
5368 /* Recursive function, which scans prefix literals. */
5369 BOOL last, any, class, caseless;
5370 int len, repeat, len_save, consumed = 0;
5371 sljit_u32 chr; /* Any unicode character. */
5372 sljit_u8 *bytes, *bytes_end, byte;
5373 PCRE2_SPTR alternative, cc_save, oc;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5375 PCRE2_UCHAR othercase[4];
5376 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5377 PCRE2_UCHAR othercase[2];
5378 #else
5379 PCRE2_UCHAR othercase[1];
5380 #endif
5381 
5382 repeat = 1;
5383 while (TRUE)
5384   {
5385   if (*rec_count == 0)
5386     return 0;
5387   (*rec_count)--;
5388 
5389   last = TRUE;
5390   any = FALSE;
5391   class = FALSE;
5392   caseless = FALSE;
5393 
5394   switch (*cc)
5395     {
5396     case OP_CHARI:
5397     caseless = TRUE;
5398     /* Fall through */
5399     case OP_CHAR:
5400     last = FALSE;
5401     cc++;
5402     break;
5403 
5404     case OP_SOD:
5405     case OP_SOM:
5406     case OP_SET_SOM:
5407     case OP_NOT_WORD_BOUNDARY:
5408     case OP_WORD_BOUNDARY:
5409     case OP_EODN:
5410     case OP_EOD:
5411     case OP_CIRC:
5412     case OP_CIRCM:
5413     case OP_DOLL:
5414     case OP_DOLLM:
5415     /* Zero width assertions. */
5416     cc++;
5417     continue;
5418 
5419     case OP_ASSERT:
5420     case OP_ASSERT_NOT:
5421     case OP_ASSERTBACK:
5422     case OP_ASSERTBACK_NOT:
5423     case OP_ASSERT_NA:
5424     case OP_ASSERTBACK_NA:
5425     cc = bracketend(cc);
5426     continue;
5427 
5428     case OP_PLUSI:
5429     case OP_MINPLUSI:
5430     case OP_POSPLUSI:
5431     caseless = TRUE;
5432     /* Fall through */
5433     case OP_PLUS:
5434     case OP_MINPLUS:
5435     case OP_POSPLUS:
5436     cc++;
5437     break;
5438 
5439     case OP_EXACTI:
5440     caseless = TRUE;
5441     /* Fall through */
5442     case OP_EXACT:
5443     repeat = GET2(cc, 1);
5444     last = FALSE;
5445     cc += 1 + IMM2_SIZE;
5446     break;
5447 
5448     case OP_QUERYI:
5449     case OP_MINQUERYI:
5450     case OP_POSQUERYI:
5451     caseless = TRUE;
5452     /* Fall through */
5453     case OP_QUERY:
5454     case OP_MINQUERY:
5455     case OP_POSQUERY:
5456     len = 1;
5457     cc++;
5458 #ifdef SUPPORT_UNICODE
5459     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5460 #endif
5461     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5462     if (max_chars == 0)
5463       return consumed;
5464     last = FALSE;
5465     break;
5466 
5467     case OP_KET:
5468     cc += 1 + LINK_SIZE;
5469     continue;
5470 
5471     case OP_ALT:
5472     cc += GET(cc, 1);
5473     continue;
5474 
5475     case OP_ONCE:
5476     case OP_BRA:
5477     case OP_BRAPOS:
5478     case OP_CBRA:
5479     case OP_CBRAPOS:
5480     alternative = cc + GET(cc, 1);
5481     while (*alternative == OP_ALT)
5482       {
5483       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5484       if (max_chars == 0)
5485         return consumed;
5486       alternative += GET(alternative, 1);
5487       }
5488 
5489     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5490       cc += IMM2_SIZE;
5491     cc += 1 + LINK_SIZE;
5492     continue;
5493 
5494     case OP_CLASS:
5495 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5496     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5497       return consumed;
5498 #endif
5499     class = TRUE;
5500     break;
5501 
5502     case OP_NCLASS:
5503 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5504     if (common->utf) return consumed;
5505 #endif
5506     class = TRUE;
5507     break;
5508 
5509 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5510     case OP_XCLASS:
5511 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5512     if (common->utf) return consumed;
5513 #endif
5514     any = TRUE;
5515     cc += GET(cc, 1);
5516     break;
5517 #endif
5518 
5519     case OP_DIGIT:
5520 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5521     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5522       return consumed;
5523 #endif
5524     any = TRUE;
5525     cc++;
5526     break;
5527 
5528     case OP_WHITESPACE:
5529 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5530     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5531       return consumed;
5532 #endif
5533     any = TRUE;
5534     cc++;
5535     break;
5536 
5537     case OP_WORDCHAR:
5538 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5539     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5540       return consumed;
5541 #endif
5542     any = TRUE;
5543     cc++;
5544     break;
5545 
5546     case OP_NOT:
5547     case OP_NOTI:
5548     cc++;
5549     /* Fall through. */
5550     case OP_NOT_DIGIT:
5551     case OP_NOT_WHITESPACE:
5552     case OP_NOT_WORDCHAR:
5553     case OP_ANY:
5554     case OP_ALLANY:
5555 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5556     if (common->utf) return consumed;
5557 #endif
5558     any = TRUE;
5559     cc++;
5560     break;
5561 
5562 #ifdef SUPPORT_UNICODE
5563     case OP_NOTPROP:
5564     case OP_PROP:
5565 #if PCRE2_CODE_UNIT_WIDTH != 32
5566     if (common->utf) return consumed;
5567 #endif
5568     any = TRUE;
5569     cc += 1 + 2;
5570     break;
5571 #endif
5572 
5573     case OP_TYPEEXACT:
5574     repeat = GET2(cc, 1);
5575     cc += 1 + IMM2_SIZE;
5576     continue;
5577 
5578     case OP_NOTEXACT:
5579     case OP_NOTEXACTI:
5580 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581     if (common->utf) return consumed;
5582 #endif
5583     any = TRUE;
5584     repeat = GET2(cc, 1);
5585     cc += 1 + IMM2_SIZE + 1;
5586     break;
5587 
5588     default:
5589     return consumed;
5590     }
5591 
5592   if (any)
5593     {
5594     do
5595       {
5596       chars->count = 255;
5597 
5598       consumed++;
5599       if (--max_chars == 0)
5600         return consumed;
5601       chars++;
5602       }
5603     while (--repeat > 0);
5604 
5605     repeat = 1;
5606     continue;
5607     }
5608 
5609   if (class)
5610     {
5611     bytes = (sljit_u8*) (cc + 1);
5612     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5613 
5614     switch (*cc)
5615       {
5616       case OP_CRSTAR:
5617       case OP_CRMINSTAR:
5618       case OP_CRPOSSTAR:
5619       case OP_CRQUERY:
5620       case OP_CRMINQUERY:
5621       case OP_CRPOSQUERY:
5622       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5623       if (max_chars == 0)
5624         return consumed;
5625       break;
5626 
5627       default:
5628       case OP_CRPLUS:
5629       case OP_CRMINPLUS:
5630       case OP_CRPOSPLUS:
5631       break;
5632 
5633       case OP_CRRANGE:
5634       case OP_CRMINRANGE:
5635       case OP_CRPOSRANGE:
5636       repeat = GET2(cc, 1);
5637       if (repeat <= 0)
5638         return consumed;
5639       break;
5640       }
5641 
5642     do
5643       {
5644       if (bytes[31] & 0x80)
5645         chars->count = 255;
5646       else if (chars->count != 255)
5647         {
5648         bytes_end = bytes + 32;
5649         chr = 0;
5650         do
5651           {
5652           byte = *bytes++;
5653           SLJIT_ASSERT((chr & 0x7) == 0);
5654           if (byte == 0)
5655             chr += 8;
5656           else
5657             {
5658             do
5659               {
5660               if ((byte & 0x1) != 0)
5661                 add_prefix_char(chr, chars, TRUE);
5662               byte >>= 1;
5663               chr++;
5664               }
5665             while (byte != 0);
5666             chr = (chr + 7) & ~7;
5667             }
5668           }
5669         while (chars->count != 255 && bytes < bytes_end);
5670         bytes = bytes_end - 32;
5671         }
5672 
5673       consumed++;
5674       if (--max_chars == 0)
5675         return consumed;
5676       chars++;
5677       }
5678     while (--repeat > 0);
5679 
5680     switch (*cc)
5681       {
5682       case OP_CRSTAR:
5683       case OP_CRMINSTAR:
5684       case OP_CRPOSSTAR:
5685       return consumed;
5686 
5687       case OP_CRQUERY:
5688       case OP_CRMINQUERY:
5689       case OP_CRPOSQUERY:
5690       cc++;
5691       break;
5692 
5693       case OP_CRRANGE:
5694       case OP_CRMINRANGE:
5695       case OP_CRPOSRANGE:
5696       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5697         return consumed;
5698       cc += 1 + 2 * IMM2_SIZE;
5699       break;
5700       }
5701 
5702     repeat = 1;
5703     continue;
5704     }
5705 
5706   len = 1;
5707 #ifdef SUPPORT_UNICODE
5708   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5709 #endif
5710 
5711   if (caseless && char_has_othercase(common, cc))
5712     {
5713 #ifdef SUPPORT_UNICODE
5714     if (common->utf)
5715       {
5716       GETCHAR(chr, cc);
5717       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5718         return consumed;
5719       }
5720     else
5721 #endif
5722       {
5723       chr = *cc;
5724 #ifdef SUPPORT_UNICODE
5725       if (common->ucp && chr > 127)
5726         othercase[0] = UCD_OTHERCASE(chr);
5727       else
5728 #endif
5729         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5730       }
5731     }
5732   else
5733     {
5734     caseless = FALSE;
5735     othercase[0] = 0; /* Stops compiler warning - PH */
5736     }
5737 
5738   len_save = len;
5739   cc_save = cc;
5740   while (TRUE)
5741     {
5742     oc = othercase;
5743     do
5744       {
5745       len--;
5746       consumed++;
5747 
5748       chr = *cc;
5749       add_prefix_char(*cc, chars, len == 0);
5750 
5751       if (caseless)
5752         add_prefix_char(*oc, chars, len == 0);
5753 
5754       if (--max_chars == 0)
5755         return consumed;
5756       chars++;
5757       cc++;
5758       oc++;
5759       }
5760     while (len > 0);
5761 
5762     if (--repeat == 0)
5763       break;
5764 
5765     len = len_save;
5766     cc = cc_save;
5767     }
5768 
5769   repeat = 1;
5770   if (last)
5771     return consumed;
5772   }
5773 }
5774 
5775 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5776 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5777 {
5778 #if PCRE2_CODE_UNIT_WIDTH == 8
5779 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5780 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5781 #elif PCRE2_CODE_UNIT_WIDTH == 16
5782 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5783 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5784 #else
5785 #error "Unknown code width"
5786 #endif
5787 }
5788 #endif
5789 
5790 #include "pcre2_jit_simd_inc.h"
5791 
5792 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5793 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5794 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5795 {
5796   sljit_s32 i, j, max_i = 0, max_j = 0;
5797   sljit_u32 max_pri = 0;
5798   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5799 
5800   for (i = max - 1; i >= 1; i--)
5801     {
5802     if (chars[i].last_count > 2)
5803       {
5804       a1 = chars[i].chars[0];
5805       a2 = chars[i].chars[1];
5806       a_pri = chars[i].last_count;
5807 
5808       j = i - max_fast_forward_char_pair_offset();
5809       if (j < 0)
5810         j = 0;
5811 
5812       while (j < i)
5813         {
5814         b_pri = chars[j].last_count;
5815         if (b_pri > 2 && a_pri + b_pri >= max_pri)
5816           {
5817           b1 = chars[j].chars[0];
5818           b2 = chars[j].chars[1];
5819 
5820           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5821             {
5822             max_pri = a_pri + b_pri;
5823             max_i = i;
5824             max_j = j;
5825             }
5826           }
5827         j++;
5828         }
5829       }
5830     }
5831 
5832 if (max_pri == 0)
5833   return FALSE;
5834 
5835 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5836 return TRUE;
5837 }
5838 
5839 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5840 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5841 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5842 {
5843 DEFINE_COMPILER;
5844 struct sljit_label *start;
5845 struct sljit_jump *match;
5846 struct sljit_jump *partial_quit;
5847 PCRE2_UCHAR mask;
5848 BOOL has_match_end = (common->match_end_ptr != 0);
5849 
5850 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5851 
5852 if (has_match_end)
5853   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5854 
5855 if (offset > 0)
5856   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5857 
5858 if (has_match_end)
5859   {
5860   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5861 
5862   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5863   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5864   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5865   }
5866 
5867 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5868 
5869 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5870   {
5871   fast_forward_char_simd(common, char1, char2, offset);
5872 
5873   if (offset > 0)
5874     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5875 
5876   if (has_match_end)
5877     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5878   return;
5879   }
5880 
5881 #endif
5882 
5883 start = LABEL();
5884 
5885 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5886 if (common->mode == PCRE2_JIT_COMPLETE)
5887   add_jump(compiler, &common->failed_match, partial_quit);
5888 
5889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5891 
5892 if (char1 == char2)
5893   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5894 else
5895   {
5896   mask = char1 ^ char2;
5897   if (is_powerof2(mask))
5898     {
5899     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5900     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5901     }
5902   else
5903     {
5904     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5905     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5906     JUMPHERE(match);
5907     }
5908   }
5909 
5910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5911 if (common->utf && offset > 0)
5912   {
5913   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5914   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5915   }
5916 #endif
5917 
5918 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5919 
5920 if (common->mode != PCRE2_JIT_COMPLETE)
5921   JUMPHERE(partial_quit);
5922 
5923 if (has_match_end)
5924   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5925 }
5926 
fast_forward_first_n_chars(compiler_common * common)5927 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5928 {
5929 DEFINE_COMPILER;
5930 struct sljit_label *start;
5931 struct sljit_jump *match;
5932 fast_forward_char_data chars[MAX_N_CHARS];
5933 sljit_s32 offset;
5934 PCRE2_UCHAR mask;
5935 PCRE2_UCHAR *char_set, *char_set_end;
5936 int i, max, from;
5937 int range_right = -1, range_len;
5938 sljit_u8 *update_table = NULL;
5939 BOOL in_range;
5940 sljit_u32 rec_count;
5941 
5942 for (i = 0; i < MAX_N_CHARS; i++)
5943   {
5944   chars[i].count = 0;
5945   chars[i].last_count = 0;
5946   }
5947 
5948 rec_count = 10000;
5949 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5950 
5951 if (max < 1)
5952   return FALSE;
5953 
5954 /* Convert last_count to priority. */
5955 for (i = 0; i < max; i++)
5956   {
5957   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5958 
5959   if (chars[i].count == 1)
5960     {
5961     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5962     /* Simplifies algorithms later. */
5963     chars[i].chars[1] = chars[i].chars[0];
5964     }
5965   else if (chars[i].count == 2)
5966     {
5967     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5968 
5969     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5970       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5971     else
5972       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5973     }
5974   else
5975     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5976   }
5977 
5978 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5979 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5980   return TRUE;
5981 #endif
5982 
5983 in_range = FALSE;
5984 /* Prevent compiler "uninitialized" warning */
5985 from = 0;
5986 range_len = 4 /* minimum length */ - 1;
5987 for (i = 0; i <= max; i++)
5988   {
5989   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5990     {
5991     range_len = i - from;
5992     range_right = i - 1;
5993     }
5994 
5995   if (i < max && chars[i].count < 255)
5996     {
5997     SLJIT_ASSERT(chars[i].count > 0);
5998     if (!in_range)
5999       {
6000       in_range = TRUE;
6001       from = i;
6002       }
6003     }
6004   else
6005     in_range = FALSE;
6006   }
6007 
6008 if (range_right >= 0)
6009   {
6010   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6011   if (update_table == NULL)
6012     return TRUE;
6013   memset(update_table, IN_UCHARS(range_len), 256);
6014 
6015   for (i = 0; i < range_len; i++)
6016     {
6017     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6018 
6019     char_set = chars[range_right - i].chars;
6020     char_set_end = char_set + chars[range_right - i].count;
6021     do
6022       {
6023       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6024         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6025       char_set++;
6026       }
6027     while (char_set < char_set_end);
6028     }
6029   }
6030 
6031 offset = -1;
6032 /* Scan forward. */
6033 for (i = 0; i < max; i++)
6034   {
6035   if (range_right == i)
6036     continue;
6037 
6038   if (offset == -1)
6039     {
6040     if (chars[i].last_count >= 2)
6041       offset = i;
6042     }
6043   else if (chars[offset].last_count < chars[i].last_count)
6044     offset = i;
6045   }
6046 
6047 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6048 
6049 if (range_right < 0)
6050   {
6051   if (offset < 0)
6052     return FALSE;
6053   /* Works regardless the value is 1 or 2. */
6054   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6055   return TRUE;
6056   }
6057 
6058 SLJIT_ASSERT(range_right != offset);
6059 
6060 if (common->match_end_ptr != 0)
6061   {
6062   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6063   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6064   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6065   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6066   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6067   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6068   }
6069 else
6070   {
6071   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6072   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6073   }
6074 
6075 SLJIT_ASSERT(range_right >= 0);
6076 
6077 if (!HAS_VIRTUAL_REGISTERS)
6078   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6079 
6080 start = LABEL();
6081 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6082 
6083 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6084 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6085 #else
6086 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6087 #endif
6088 
6089 if (!HAS_VIRTUAL_REGISTERS)
6090   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6091 else
6092   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6093 
6094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6095 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6096 
6097 if (offset >= 0)
6098   {
6099   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6100   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6101 
6102   if (chars[offset].count == 1)
6103     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6104   else
6105     {
6106     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6107     if (is_powerof2(mask))
6108       {
6109       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6110       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6111       }
6112     else
6113       {
6114       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6115       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6116       JUMPHERE(match);
6117       }
6118     }
6119   }
6120 
6121 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6122 if (common->utf && offset != 0)
6123   {
6124   if (offset < 0)
6125     {
6126     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6127     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6128     }
6129   else
6130     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6131 
6132   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6133 
6134   if (offset < 0)
6135     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6136   }
6137 #endif
6138 
6139 if (offset >= 0)
6140   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6141 
6142 if (common->match_end_ptr != 0)
6143   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6144 else
6145   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146 return TRUE;
6147 }
6148 
fast_forward_first_char(compiler_common * common)6149 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6150 {
6151 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6152 PCRE2_UCHAR oc;
6153 
6154 oc = first_char;
6155 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6156   {
6157   oc = TABLE_GET(first_char, common->fcc, first_char);
6158 #if defined SUPPORT_UNICODE
6159   if (first_char > 127 && (common->utf || common->ucp))
6160     oc = UCD_OTHERCASE(first_char);
6161 #endif
6162   }
6163 
6164 fast_forward_first_char2(common, first_char, oc, 0);
6165 }
6166 
fast_forward_newline(compiler_common * common)6167 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6168 {
6169 DEFINE_COMPILER;
6170 struct sljit_label *loop;
6171 struct sljit_jump *lastchar = NULL;
6172 struct sljit_jump *firstchar;
6173 struct sljit_jump *quit = NULL;
6174 struct sljit_jump *foundcr = NULL;
6175 struct sljit_jump *notfoundnl;
6176 jump_list *newline = NULL;
6177 
6178 if (common->match_end_ptr != 0)
6179   {
6180   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6181   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6182   }
6183 
6184 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6185   {
6186 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6187   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6188     {
6189     if (HAS_VIRTUAL_REGISTERS)
6190       {
6191       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6192       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6193       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6194       }
6195     else
6196       {
6197       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6198       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6199       }
6200     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6201 
6202     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6203     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6204     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6205 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6206     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6207 #endif
6208     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6209 
6210     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6211     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6212     }
6213   else
6214 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6215     {
6216     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6217     if (HAS_VIRTUAL_REGISTERS)
6218       {
6219       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6220       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6221       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6222       }
6223     else
6224       {
6225       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6226       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6227       }
6228     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6229 
6230     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6231     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6232     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6233 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6234     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6235 #endif
6236     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6237 
6238     loop = LABEL();
6239     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6240     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6241     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6242     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6243     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6244     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6245 
6246     JUMPHERE(quit);
6247     JUMPHERE(lastchar);
6248     }
6249 
6250   JUMPHERE(firstchar);
6251 
6252   if (common->match_end_ptr != 0)
6253     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6254   return;
6255   }
6256 
6257 if (HAS_VIRTUAL_REGISTERS)
6258   {
6259   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6260   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6261   }
6262 else
6263   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6264 
6265 /* Example: match /^/ to \r\n from offset 1. */
6266 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6267 
6268 if (common->nltype == NLTYPE_ANY)
6269   move_back(common, NULL, FALSE);
6270 else
6271   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272 
6273 loop = LABEL();
6274 common->ff_newline_shortcut = loop;
6275 
6276 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6277 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6278   {
6279   if (common->nltype == NLTYPE_ANYCRLF)
6280     {
6281     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6282     if (common->mode != PCRE2_JIT_COMPLETE)
6283       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6284 
6285     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6286     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6287     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6288     }
6289    else
6290     {
6291     fast_forward_char_simd(common, common->newline, common->newline, 0);
6292 
6293     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6294     if (common->mode != PCRE2_JIT_COMPLETE)
6295       {
6296       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6297       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6298       }
6299     }
6300   }
6301 else
6302 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6303   {
6304   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6305   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6306   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6307     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6308   check_newlinechar(common, common->nltype, &newline, FALSE);
6309   set_jumps(newline, loop);
6310   }
6311 
6312 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6313   {
6314   if (quit == NULL)
6315     {
6316     quit = JUMP(SLJIT_JUMP);
6317     JUMPHERE(foundcr);
6318     }
6319 
6320   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6321   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6323   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6324 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6325   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6326 #endif
6327   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6328   JUMPHERE(notfoundnl);
6329   JUMPHERE(quit);
6330   }
6331 
6332 if (lastchar)
6333   JUMPHERE(lastchar);
6334 JUMPHERE(firstchar);
6335 
6336 if (common->match_end_ptr != 0)
6337   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6338 }
6339 
6340 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6341 
fast_forward_start_bits(compiler_common * common)6342 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6343 {
6344 DEFINE_COMPILER;
6345 const sljit_u8 *start_bits = common->re->start_bitmap;
6346 struct sljit_label *start;
6347 struct sljit_jump *partial_quit;
6348 #if PCRE2_CODE_UNIT_WIDTH != 8
6349 struct sljit_jump *found = NULL;
6350 #endif
6351 jump_list *matches = NULL;
6352 
6353 if (common->match_end_ptr != 0)
6354   {
6355   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6356   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6357   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6358   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6359   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6360   }
6361 
6362 start = LABEL();
6363 
6364 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6365 if (common->mode == PCRE2_JIT_COMPLETE)
6366   add_jump(compiler, &common->failed_match, partial_quit);
6367 
6368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370 
6371 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6372   {
6373 #if PCRE2_CODE_UNIT_WIDTH != 8
6374   if ((start_bits[31] & 0x80) != 0)
6375     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6376   else
6377     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6378 #elif defined SUPPORT_UNICODE
6379   if (common->utf && is_char7_bitset(start_bits, FALSE))
6380     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6381 #endif
6382   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6383   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6384   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6385   if (!HAS_VIRTUAL_REGISTERS)
6386     {
6387     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6388     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6389     }
6390   else
6391     {
6392     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6393     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6394     }
6395   JUMPTO(SLJIT_ZERO, start);
6396   }
6397 else
6398   set_jumps(matches, start);
6399 
6400 #if PCRE2_CODE_UNIT_WIDTH != 8
6401 if (found != NULL)
6402   JUMPHERE(found);
6403 #endif
6404 
6405 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6406 
6407 if (common->mode != PCRE2_JIT_COMPLETE)
6408   JUMPHERE(partial_quit);
6409 
6410 if (common->match_end_ptr != 0)
6411   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6412 }
6413 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6414 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6415 {
6416 DEFINE_COMPILER;
6417 struct sljit_label *loop;
6418 struct sljit_jump *toolong;
6419 struct sljit_jump *already_found;
6420 struct sljit_jump *found;
6421 struct sljit_jump *found_oc = NULL;
6422 jump_list *not_found = NULL;
6423 sljit_u32 oc, bit;
6424 
6425 SLJIT_ASSERT(common->req_char_ptr != 0);
6426 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6428 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6429 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6430 
6431 if (has_firstchar)
6432   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6433 else
6434   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6435 
6436 oc = req_char;
6437 if (caseless)
6438   {
6439   oc = TABLE_GET(req_char, common->fcc, req_char);
6440 #if defined SUPPORT_UNICODE
6441   if (req_char > 127 && (common->utf || common->ucp))
6442     oc = UCD_OTHERCASE(req_char);
6443 #endif
6444   }
6445 
6446 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6447 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6448   {
6449   not_found = fast_requested_char_simd(common, req_char, oc);
6450   }
6451 else
6452 #endif
6453   {
6454   loop = LABEL();
6455   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6456 
6457   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6458 
6459   if (req_char == oc)
6460     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6461   else
6462     {
6463     bit = req_char ^ oc;
6464     if (is_powerof2(bit))
6465       {
6466        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6467       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6468       }
6469     else
6470       {
6471       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6472       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6473       }
6474     }
6475   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6476   JUMPTO(SLJIT_JUMP, loop);
6477 
6478   JUMPHERE(found);
6479   if (found_oc)
6480     JUMPHERE(found_oc);
6481   }
6482 
6483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6484 
6485 JUMPHERE(already_found);
6486 JUMPHERE(toolong);
6487 return not_found;
6488 }
6489 
do_revertframes(compiler_common * common)6490 static void do_revertframes(compiler_common *common)
6491 {
6492 DEFINE_COMPILER;
6493 struct sljit_jump *jump;
6494 struct sljit_label *mainloop;
6495 
6496 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6497 GET_LOCAL_BASE(TMP1, 0, 0);
6498 
6499 /* Drop frames until we reach STACK_TOP. */
6500 mainloop = LABEL();
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6502 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6503 
6504 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6505 if (HAS_VIRTUAL_REGISTERS)
6506   {
6507   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6508   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6509   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6510   }
6511 else
6512   {
6513   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6514   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6515   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6516   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6517   GET_LOCAL_BASE(TMP1, 0, 0);
6518   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6519   }
6520 JUMPTO(SLJIT_JUMP, mainloop);
6521 
6522 JUMPHERE(jump);
6523 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6524 /* End of reverting values. */
6525 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6526 
6527 JUMPHERE(jump);
6528 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6529 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6530 if (HAS_VIRTUAL_REGISTERS)
6531   {
6532   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6533   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6534   }
6535 else
6536   {
6537   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6538   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6539   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6540   }
6541 JUMPTO(SLJIT_JUMP, mainloop);
6542 }
6543 
check_wordboundary(compiler_common * common)6544 static void check_wordboundary(compiler_common *common)
6545 {
6546 DEFINE_COMPILER;
6547 struct sljit_jump *skipread;
6548 jump_list *skipread_list = NULL;
6549 #ifdef SUPPORT_UNICODE
6550 struct sljit_label *valid_utf;
6551 jump_list *invalid_utf1 = NULL;
6552 #endif /* SUPPORT_UNICODE */
6553 jump_list *invalid_utf2 = NULL;
6554 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6555 struct sljit_jump *jump;
6556 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6557 
6558 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6559 
6560 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6561 /* Get type of the previous char, and put it to TMP3. */
6562 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6564 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6565 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6566 
6567 #ifdef SUPPORT_UNICODE
6568 if (common->invalid_utf)
6569   {
6570   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6571 
6572   if (common->mode != PCRE2_JIT_COMPLETE)
6573     {
6574     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6575     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6576     move_back(common, NULL, TRUE);
6577     check_start_used_ptr(common);
6578     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6579     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6580     }
6581   }
6582 else
6583 #endif /* SUPPORT_UNICODE */
6584   {
6585   if (common->mode == PCRE2_JIT_COMPLETE)
6586     peek_char_back(common, READ_CHAR_MAX, NULL);
6587   else
6588     {
6589     move_back(common, NULL, TRUE);
6590     check_start_used_ptr(common);
6591     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6592     }
6593   }
6594 
6595 /* Testing char type. */
6596 #ifdef SUPPORT_UNICODE
6597 if (common->ucp)
6598   {
6599   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6600   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6601   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6602   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6603   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6604   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6605   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6606   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6607   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6608   JUMPHERE(jump);
6609   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6610   }
6611 else
6612 #endif /* SUPPORT_UNICODE */
6613   {
6614 #if PCRE2_CODE_UNIT_WIDTH != 8
6615   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6616 #elif defined SUPPORT_UNICODE
6617   /* Here TMP3 has already been zeroed. */
6618   jump = NULL;
6619   if (common->utf)
6620     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6621 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6622   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6623   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6624   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6625 #if PCRE2_CODE_UNIT_WIDTH != 8
6626   JUMPHERE(jump);
6627 #elif defined SUPPORT_UNICODE
6628   if (jump != NULL)
6629     JUMPHERE(jump);
6630 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6631   }
6632 JUMPHERE(skipread);
6633 
6634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6635 check_str_end(common, &skipread_list);
6636 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6637 
6638 /* Testing char type. This is a code duplication. */
6639 #ifdef SUPPORT_UNICODE
6640 
6641 valid_utf = LABEL();
6642 
6643 if (common->ucp)
6644   {
6645   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6646   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6647   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6648   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6649   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6650   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6651   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6652   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6653   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6654   JUMPHERE(jump);
6655   }
6656 else
6657 #endif /* SUPPORT_UNICODE */
6658   {
6659 #if PCRE2_CODE_UNIT_WIDTH != 8
6660   /* TMP2 may be destroyed by peek_char. */
6661   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6662   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6663 #elif defined SUPPORT_UNICODE
6664   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6665   jump = NULL;
6666   if (common->utf)
6667     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6668 #endif
6669   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6670   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6671   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6672 #if PCRE2_CODE_UNIT_WIDTH != 8
6673   JUMPHERE(jump);
6674 #elif defined SUPPORT_UNICODE
6675   if (jump != NULL)
6676     JUMPHERE(jump);
6677 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6678   }
6679 set_jumps(skipread_list, LABEL());
6680 
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6682 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6683 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6684 
6685 #ifdef SUPPORT_UNICODE
6686 if (common->invalid_utf)
6687   {
6688   set_jumps(invalid_utf1, LABEL());
6689 
6690   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6691   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6692 
6693   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6694   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6695   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6696 
6697   set_jumps(invalid_utf2, LABEL());
6698   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6699   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6700   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6701   }
6702 #endif /* SUPPORT_UNICODE */
6703 }
6704 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6705 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6706 {
6707 /* May destroy TMP1. */
6708 DEFINE_COMPILER;
6709 int ranges[MAX_CLASS_RANGE_SIZE];
6710 sljit_u8 bit, cbit, all;
6711 int i, byte, length = 0;
6712 
6713 bit = bits[0] & 0x1;
6714 /* All bits will be zero or one (since bit is zero or one). */
6715 all = -bit;
6716 
6717 for (i = 0; i < 256; )
6718   {
6719   byte = i >> 3;
6720   if ((i & 0x7) == 0 && bits[byte] == all)
6721     i += 8;
6722   else
6723     {
6724     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6725     if (cbit != bit)
6726       {
6727       if (length >= MAX_CLASS_RANGE_SIZE)
6728         return FALSE;
6729       ranges[length] = i;
6730       length++;
6731       bit = cbit;
6732       all = -cbit;
6733       }
6734     i++;
6735     }
6736   }
6737 
6738 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6739   {
6740   if (length >= MAX_CLASS_RANGE_SIZE)
6741     return FALSE;
6742   ranges[length] = 256;
6743   length++;
6744   }
6745 
6746 if (length < 0 || length > 4)
6747   return FALSE;
6748 
6749 bit = bits[0] & 0x1;
6750 if (invert) bit ^= 0x1;
6751 
6752 /* No character is accepted. */
6753 if (length == 0 && bit == 0)
6754   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6755 
6756 switch(length)
6757   {
6758   case 0:
6759   /* When bit != 0, all characters are accepted. */
6760   return TRUE;
6761 
6762   case 1:
6763   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6764   return TRUE;
6765 
6766   case 2:
6767   if (ranges[0] + 1 != ranges[1])
6768     {
6769     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6770     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6771     }
6772   else
6773     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6774   return TRUE;
6775 
6776   case 3:
6777   if (bit != 0)
6778     {
6779     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6780     if (ranges[0] + 1 != ranges[1])
6781       {
6782       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6783       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6784       }
6785     else
6786       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6787     return TRUE;
6788     }
6789 
6790   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6791   if (ranges[1] + 1 != ranges[2])
6792     {
6793     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6794     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6795     }
6796   else
6797     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6798   return TRUE;
6799 
6800   case 4:
6801   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6802       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6803       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6804       && is_powerof2(ranges[2] - ranges[0]))
6805     {
6806     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6807     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6808     if (ranges[2] + 1 != ranges[3])
6809       {
6810       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6811       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6812       }
6813     else
6814       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6815     return TRUE;
6816     }
6817 
6818   if (bit != 0)
6819     {
6820     i = 0;
6821     if (ranges[0] + 1 != ranges[1])
6822       {
6823       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6824       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6825       i = ranges[0];
6826       }
6827     else
6828       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6829 
6830     if (ranges[2] + 1 != ranges[3])
6831       {
6832       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6833       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6834       }
6835     else
6836       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6837     return TRUE;
6838     }
6839 
6840   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6841   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6842   if (ranges[1] + 1 != ranges[2])
6843     {
6844     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6845     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6846     }
6847   else
6848     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6849   return TRUE;
6850 
6851   default:
6852   SLJIT_UNREACHABLE();
6853   return FALSE;
6854   }
6855 }
6856 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6857 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6858 {
6859 /* May destroy TMP1. */
6860 DEFINE_COMPILER;
6861 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6862 uint8_t byte;
6863 sljit_s32 type;
6864 int i, j, k, len, c;
6865 
6866 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6867   return FALSE;
6868 
6869 len = 0;
6870 
6871 for (i = 0; i < 32; i++)
6872   {
6873   byte = bits[i];
6874 
6875   if (nclass)
6876     byte = ~byte;
6877 
6878   j = 0;
6879   while (byte != 0)
6880     {
6881     if (byte & 0x1)
6882       {
6883       c = i * 8 + j;
6884 
6885       k = len;
6886 
6887       if ((c & 0x20) != 0)
6888         {
6889         for (k = 0; k < len; k++)
6890           if (char_list[k] == c - 0x20)
6891             {
6892             char_list[k] |= 0x120;
6893             break;
6894             }
6895         }
6896 
6897       if (k == len)
6898         {
6899         if (len >= MAX_CLASS_CHARS_SIZE)
6900           return FALSE;
6901 
6902         char_list[len++] = (uint16_t) c;
6903         }
6904       }
6905 
6906     byte >>= 1;
6907     j++;
6908     }
6909   }
6910 
6911 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6912 
6913 i = 0;
6914 j = 0;
6915 
6916 if (char_list[0] == 0)
6917   {
6918   i++;
6919   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6920   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6921   }
6922 else
6923   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6924 
6925 while (i < len)
6926   {
6927   if ((char_list[i] & 0x100) != 0)
6928     j++;
6929   else
6930     {
6931     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6932     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6933     }
6934   i++;
6935   }
6936 
6937 if (j != 0)
6938   {
6939   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6940 
6941   for (i = 0; i < len; i++)
6942     if ((char_list[i] & 0x100) != 0)
6943       {
6944       j--;
6945       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6946       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6947       }
6948   }
6949 
6950 if (invert)
6951   nclass = !nclass;
6952 
6953 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6954 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6955 return TRUE;
6956 }
6957 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6958 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6959 {
6960 /* May destroy TMP1. */
6961 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6962   return TRUE;
6963 return optimize_class_chars(common, bits, nclass, invert, backtracks);
6964 }
6965 
check_anynewline(compiler_common * common)6966 static void check_anynewline(compiler_common *common)
6967 {
6968 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6969 DEFINE_COMPILER;
6970 
6971 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6972 
6973 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6974 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6975 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6976 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6977 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6978 #if PCRE2_CODE_UNIT_WIDTH == 8
6979 if (common->utf)
6980   {
6981 #endif
6982   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6983   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6984   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6985 #if PCRE2_CODE_UNIT_WIDTH == 8
6986   }
6987 #endif
6988 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6989 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6990 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6991 }
6992 
check_hspace(compiler_common * common)6993 static void check_hspace(compiler_common *common)
6994 {
6995 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6996 DEFINE_COMPILER;
6997 
6998 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6999 
7000 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7001 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7002 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7003 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7004 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7005 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7006 #if PCRE2_CODE_UNIT_WIDTH == 8
7007 if (common->utf)
7008   {
7009 #endif
7010   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7011   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7012   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7013   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7014   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7015   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7016   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7017   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7018   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7019   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7020   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7021   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7022   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7023 #if PCRE2_CODE_UNIT_WIDTH == 8
7024   }
7025 #endif
7026 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7027 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7028 
7029 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7030 }
7031 
check_vspace(compiler_common * common)7032 static void check_vspace(compiler_common *common)
7033 {
7034 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7035 DEFINE_COMPILER;
7036 
7037 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7038 
7039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7040 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7041 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7042 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7043 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7044 #if PCRE2_CODE_UNIT_WIDTH == 8
7045 if (common->utf)
7046   {
7047 #endif
7048   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7049   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7050   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7051 #if PCRE2_CODE_UNIT_WIDTH == 8
7052   }
7053 #endif
7054 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7055 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7056 
7057 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7058 }
7059 
do_casefulcmp(compiler_common * common)7060 static void do_casefulcmp(compiler_common *common)
7061 {
7062 DEFINE_COMPILER;
7063 struct sljit_jump *jump;
7064 struct sljit_label *label;
7065 int char1_reg;
7066 int char2_reg;
7067 
7068 if (HAS_VIRTUAL_REGISTERS)
7069   {
7070   char1_reg = STR_END;
7071   char2_reg = STACK_TOP;
7072   }
7073 else
7074   {
7075   char1_reg = TMP3;
7076   char2_reg = RETURN_ADDR;
7077   }
7078 
7079 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7080 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7081 
7082 if (char1_reg == STR_END)
7083   {
7084   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7085   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7086   }
7087 
7088 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7089   {
7090   label = LABEL();
7091   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7092   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7093   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7094   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7095   JUMPTO(SLJIT_NOT_ZERO, label);
7096 
7097   JUMPHERE(jump);
7098   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7099   }
7100 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7101   {
7102   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7103   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7104 
7105   label = LABEL();
7106   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7107   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7108   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7109   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7110   JUMPTO(SLJIT_NOT_ZERO, label);
7111 
7112   JUMPHERE(jump);
7113   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7114   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7115   }
7116 else
7117   {
7118   label = LABEL();
7119   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7120   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7121   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7122   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7123   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7124   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7125   JUMPTO(SLJIT_NOT_ZERO, label);
7126 
7127   JUMPHERE(jump);
7128   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7129   }
7130 
7131 if (char1_reg == STR_END)
7132   {
7133   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7134   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7135   }
7136 
7137 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7138 }
7139 
do_caselesscmp(compiler_common * common)7140 static void do_caselesscmp(compiler_common *common)
7141 {
7142 DEFINE_COMPILER;
7143 struct sljit_jump *jump;
7144 struct sljit_label *label;
7145 int char1_reg = STR_END;
7146 int char2_reg;
7147 int lcc_table;
7148 int opt_type = 0;
7149 
7150 if (HAS_VIRTUAL_REGISTERS)
7151   {
7152   char2_reg = STACK_TOP;
7153   lcc_table = STACK_LIMIT;
7154   }
7155 else
7156   {
7157   char2_reg = RETURN_ADDR;
7158   lcc_table = TMP3;
7159   }
7160 
7161 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7162   opt_type = 1;
7163 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7164   opt_type = 2;
7165 
7166 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7167 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7168 
7169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7170 
7171 if (char2_reg == STACK_TOP)
7172   {
7173   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7174   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7175   }
7176 
7177 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7178 
7179 if (opt_type == 1)
7180   {
7181   label = LABEL();
7182   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7183   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7184   }
7185 else if (opt_type == 2)
7186   {
7187   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7188   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189 
7190   label = LABEL();
7191   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7192   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7193   }
7194 else
7195   {
7196   label = LABEL();
7197   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7198   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7199   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7200   }
7201 
7202 #if PCRE2_CODE_UNIT_WIDTH != 8
7203 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7204 #endif
7205 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7206 #if PCRE2_CODE_UNIT_WIDTH != 8
7207 JUMPHERE(jump);
7208 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7209 #endif
7210 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7211 #if PCRE2_CODE_UNIT_WIDTH != 8
7212 JUMPHERE(jump);
7213 #endif
7214 
7215 if (opt_type == 0)
7216   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7217 
7218 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7219 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7220 JUMPTO(SLJIT_NOT_ZERO, label);
7221 
7222 JUMPHERE(jump);
7223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7224 
7225 if (opt_type == 2)
7226   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7227 
7228 if (char2_reg == STACK_TOP)
7229   {
7230   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7231   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7232   }
7233 
7234 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7235 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7236 }
7237 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7238 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7239     compare_context *context, jump_list **backtracks)
7240 {
7241 DEFINE_COMPILER;
7242 unsigned int othercasebit = 0;
7243 PCRE2_SPTR othercasechar = NULL;
7244 #ifdef SUPPORT_UNICODE
7245 int utflength;
7246 #endif
7247 
7248 if (caseless && char_has_othercase(common, cc))
7249   {
7250   othercasebit = char_get_othercase_bit(common, cc);
7251   SLJIT_ASSERT(othercasebit);
7252   /* Extracting bit difference info. */
7253 #if PCRE2_CODE_UNIT_WIDTH == 8
7254   othercasechar = cc + (othercasebit >> 8);
7255   othercasebit &= 0xff;
7256 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7257   /* Note that this code only handles characters in the BMP. If there
7258   ever are characters outside the BMP whose othercase differs in only one
7259   bit from itself (there currently are none), this code will need to be
7260   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7261   othercasechar = cc + (othercasebit >> 9);
7262   if ((othercasebit & 0x100) != 0)
7263     othercasebit = (othercasebit & 0xff) << 8;
7264   else
7265     othercasebit &= 0xff;
7266 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7267   }
7268 
7269 if (context->sourcereg == -1)
7270   {
7271 #if PCRE2_CODE_UNIT_WIDTH == 8
7272 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7273   if (context->length >= 4)
7274     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7275   else if (context->length >= 2)
7276     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7277   else
7278 #endif
7279     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7280 #elif PCRE2_CODE_UNIT_WIDTH == 16
7281 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7282   if (context->length >= 4)
7283     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7284   else
7285 #endif
7286     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7287 #elif PCRE2_CODE_UNIT_WIDTH == 32
7288   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7289 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7290   context->sourcereg = TMP2;
7291   }
7292 
7293 #ifdef SUPPORT_UNICODE
7294 utflength = 1;
7295 if (common->utf && HAS_EXTRALEN(*cc))
7296   utflength += GET_EXTRALEN(*cc);
7297 
7298 do
7299   {
7300 #endif
7301 
7302   context->length -= IN_UCHARS(1);
7303 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7304 
7305   /* Unaligned read is supported. */
7306   if (othercasebit != 0 && othercasechar == cc)
7307     {
7308     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7309     context->oc.asuchars[context->ucharptr] = othercasebit;
7310     }
7311   else
7312     {
7313     context->c.asuchars[context->ucharptr] = *cc;
7314     context->oc.asuchars[context->ucharptr] = 0;
7315     }
7316   context->ucharptr++;
7317 
7318 #if PCRE2_CODE_UNIT_WIDTH == 8
7319   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7320 #else
7321   if (context->ucharptr >= 2 || context->length == 0)
7322 #endif
7323     {
7324     if (context->length >= 4)
7325       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7326     else if (context->length >= 2)
7327       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7328 #if PCRE2_CODE_UNIT_WIDTH == 8
7329     else if (context->length >= 1)
7330       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7331 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7332     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7333 
7334     switch(context->ucharptr)
7335       {
7336       case 4 / sizeof(PCRE2_UCHAR):
7337       if (context->oc.asint != 0)
7338         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7339       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7340       break;
7341 
7342       case 2 / sizeof(PCRE2_UCHAR):
7343       if (context->oc.asushort != 0)
7344         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7345       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7346       break;
7347 
7348 #if PCRE2_CODE_UNIT_WIDTH == 8
7349       case 1:
7350       if (context->oc.asbyte != 0)
7351         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7352       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7353       break;
7354 #endif
7355 
7356       default:
7357       SLJIT_UNREACHABLE();
7358       break;
7359       }
7360     context->ucharptr = 0;
7361     }
7362 
7363 #else
7364 
7365   /* Unaligned read is unsupported or in 32 bit mode. */
7366   if (context->length >= 1)
7367     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7368 
7369   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7370 
7371   if (othercasebit != 0 && othercasechar == cc)
7372     {
7373     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7374     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7375     }
7376   else
7377     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7378 
7379 #endif
7380 
7381   cc++;
7382 #ifdef SUPPORT_UNICODE
7383   utflength--;
7384   }
7385 while (utflength > 0);
7386 #endif
7387 
7388 return cc;
7389 }
7390 
7391 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7392 
7393 #define SET_TYPE_OFFSET(value) \
7394   if ((value) != typeoffset) \
7395     { \
7396     if ((value) < typeoffset) \
7397       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7398     else \
7399       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7400     } \
7401   typeoffset = (value);
7402 
7403 #define SET_CHAR_OFFSET(value) \
7404   if ((value) != charoffset) \
7405     { \
7406     if ((value) < charoffset) \
7407       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7408     else \
7409       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7410     } \
7411   charoffset = (value);
7412 
7413 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7414 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7415 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7416 {
7417 DEFINE_COMPILER;
7418 jump_list *found = NULL;
7419 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7420 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7421 struct sljit_jump *jump = NULL;
7422 PCRE2_SPTR ccbegin;
7423 int compares, invertcmp, numberofcmps;
7424 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7425 BOOL utf = common->utf;
7426 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7427 
7428 #ifdef SUPPORT_UNICODE
7429 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7430 BOOL charsaved = FALSE;
7431 int typereg = TMP1;
7432 const sljit_u32 *other_cases;
7433 sljit_uw typeoffset;
7434 #endif /* SUPPORT_UNICODE */
7435 
7436 /* Scanning the necessary info. */
7437 cc++;
7438 ccbegin = cc;
7439 compares = 0;
7440 
7441 if (cc[-1] & XCL_MAP)
7442   {
7443   min = 0;
7444   cc += 32 / sizeof(PCRE2_UCHAR);
7445   }
7446 
7447 while (*cc != XCL_END)
7448   {
7449   compares++;
7450   if (*cc == XCL_SINGLE)
7451     {
7452     cc ++;
7453     GETCHARINCTEST(c, cc);
7454     if (c > max) max = c;
7455     if (c < min) min = c;
7456 #ifdef SUPPORT_UNICODE
7457     needschar = TRUE;
7458 #endif /* SUPPORT_UNICODE */
7459     }
7460   else if (*cc == XCL_RANGE)
7461     {
7462     cc ++;
7463     GETCHARINCTEST(c, cc);
7464     if (c < min) min = c;
7465     GETCHARINCTEST(c, cc);
7466     if (c > max) max = c;
7467 #ifdef SUPPORT_UNICODE
7468     needschar = TRUE;
7469 #endif /* SUPPORT_UNICODE */
7470     }
7471 #ifdef SUPPORT_UNICODE
7472   else
7473     {
7474     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7475     cc++;
7476     if (*cc == PT_CLIST)
7477       {
7478       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7479       while (*other_cases != NOTACHAR)
7480         {
7481         if (*other_cases > max) max = *other_cases;
7482         if (*other_cases < min) min = *other_cases;
7483         other_cases++;
7484         }
7485       }
7486     else
7487       {
7488       max = READ_CHAR_MAX;
7489       min = 0;
7490       }
7491 
7492     switch(*cc)
7493       {
7494       case PT_ANY:
7495       /* Any either accepts everything or ignored. */
7496       if (cc[-1] == XCL_PROP)
7497         {
7498         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7499         if (list == backtracks)
7500           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7501         return;
7502         }
7503       break;
7504 
7505       case PT_LAMP:
7506       case PT_GC:
7507       case PT_PC:
7508       case PT_ALNUM:
7509       needstype = TRUE;
7510       break;
7511 
7512       case PT_SC:
7513       needsscript = TRUE;
7514       break;
7515 
7516       case PT_SPACE:
7517       case PT_PXSPACE:
7518       case PT_WORD:
7519       case PT_PXGRAPH:
7520       case PT_PXPRINT:
7521       case PT_PXPUNCT:
7522       needstype = TRUE;
7523       needschar = TRUE;
7524       break;
7525 
7526       case PT_CLIST:
7527       case PT_UCNC:
7528       needschar = TRUE;
7529       break;
7530 
7531       default:
7532       SLJIT_UNREACHABLE();
7533       break;
7534       }
7535     cc += 2;
7536     }
7537 #endif /* SUPPORT_UNICODE */
7538   }
7539 SLJIT_ASSERT(compares > 0);
7540 
7541 /* We are not necessary in utf mode even in 8 bit mode. */
7542 cc = ccbegin;
7543 if ((cc[-1] & XCL_NOT) != 0)
7544   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7545 else
7546   {
7547 #ifdef SUPPORT_UNICODE
7548   read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7549 #else /* !SUPPORT_UNICODE */
7550   read_char(common, min, max, NULL, 0);
7551 #endif /* SUPPORT_UNICODE */
7552   }
7553 
7554 if ((cc[-1] & XCL_HASPROP) == 0)
7555   {
7556   if ((cc[-1] & XCL_MAP) != 0)
7557     {
7558     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7559     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7560       {
7561       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7562       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7563       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7564       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7565       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7566       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7567       }
7568 
7569     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570     JUMPHERE(jump);
7571 
7572     cc += 32 / sizeof(PCRE2_UCHAR);
7573     }
7574   else
7575     {
7576     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7577     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7578     }
7579   }
7580 else if ((cc[-1] & XCL_MAP) != 0)
7581   {
7582   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7583 #ifdef SUPPORT_UNICODE
7584   charsaved = TRUE;
7585 #endif /* SUPPORT_UNICODE */
7586   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7587     {
7588 #if PCRE2_CODE_UNIT_WIDTH == 8
7589     jump = NULL;
7590     if (common->utf)
7591 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7592       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7593 
7594     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7595     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7596     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7597     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7598     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7599     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7600 
7601 #if PCRE2_CODE_UNIT_WIDTH == 8
7602     if (common->utf)
7603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7604       JUMPHERE(jump);
7605     }
7606 
7607   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7608   cc += 32 / sizeof(PCRE2_UCHAR);
7609   }
7610 
7611 #ifdef SUPPORT_UNICODE
7612 if (needstype || needsscript)
7613   {
7614   if (needschar && !charsaved)
7615     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7616 
7617 #if PCRE2_CODE_UNIT_WIDTH == 32
7618   if (!common->utf)
7619     {
7620     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7621     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7622     JUMPHERE(jump);
7623     }
7624 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7625 
7626   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7627   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7628   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7629   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7630   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7631   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7633   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7634 
7635   /* Before anything else, we deal with scripts. */
7636   if (needsscript)
7637     {
7638     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7639     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7640     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7641 
7642     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7643 
7644     ccbegin = cc;
7645 
7646     while (*cc != XCL_END)
7647       {
7648       if (*cc == XCL_SINGLE)
7649         {
7650         cc ++;
7651         GETCHARINCTEST(c, cc);
7652         }
7653       else if (*cc == XCL_RANGE)
7654         {
7655         cc ++;
7656         GETCHARINCTEST(c, cc);
7657         GETCHARINCTEST(c, cc);
7658         }
7659       else
7660         {
7661         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7662         cc++;
7663         if (*cc == PT_SC)
7664           {
7665           compares--;
7666           invertcmp = (compares == 0 && list != backtracks);
7667           if (cc[-1] == XCL_NOTPROP)
7668             invertcmp ^= 0x1;
7669           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7670           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7671           }
7672         cc += 2;
7673         }
7674       }
7675 
7676     cc = ccbegin;
7677 
7678     if (needstype)
7679       {
7680       /* TMP2 has already been shifted by 2 */
7681       if (!needschar)
7682         {
7683         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7684         OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7685 
7686         OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7687         }
7688       else
7689         {
7690         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7691         OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7692 
7693         OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7694         OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7695         typereg = RETURN_ADDR;
7696         }
7697       }
7698     else if (needschar)
7699       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7700     }
7701   else if (needstype)
7702     {
7703     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7704     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7705 
7706     if (!needschar)
7707       {
7708       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7709 
7710       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7711       }
7712     else
7713       {
7714       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7715 
7716       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7717       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7718       typereg = RETURN_ADDR;
7719       }
7720     }
7721   else if (needschar)
7722     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7723   }
7724 #endif /* SUPPORT_UNICODE */
7725 
7726 /* Generating code. */
7727 charoffset = 0;
7728 numberofcmps = 0;
7729 #ifdef SUPPORT_UNICODE
7730 typeoffset = 0;
7731 #endif /* SUPPORT_UNICODE */
7732 
7733 while (*cc != XCL_END)
7734   {
7735   compares--;
7736   invertcmp = (compares == 0 && list != backtracks);
7737   jump = NULL;
7738 
7739   if (*cc == XCL_SINGLE)
7740     {
7741     cc ++;
7742     GETCHARINCTEST(c, cc);
7743 
7744     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7745       {
7746       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7747       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7748       numberofcmps++;
7749       }
7750     else if (numberofcmps > 0)
7751       {
7752       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7753       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7754       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7755       numberofcmps = 0;
7756       }
7757     else
7758       {
7759       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7760       numberofcmps = 0;
7761       }
7762     }
7763   else if (*cc == XCL_RANGE)
7764     {
7765     cc ++;
7766     GETCHARINCTEST(c, cc);
7767     SET_CHAR_OFFSET(c);
7768     GETCHARINCTEST(c, cc);
7769 
7770     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7771       {
7772       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7773       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7774       numberofcmps++;
7775       }
7776     else if (numberofcmps > 0)
7777       {
7778       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7779       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7780       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7781       numberofcmps = 0;
7782       }
7783     else
7784       {
7785       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7786       numberofcmps = 0;
7787       }
7788     }
7789 #ifdef SUPPORT_UNICODE
7790   else
7791     {
7792     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7793     if (*cc == XCL_NOTPROP)
7794       invertcmp ^= 0x1;
7795     cc++;
7796     switch(*cc)
7797       {
7798       case PT_ANY:
7799       if (!invertcmp)
7800         jump = JUMP(SLJIT_JUMP);
7801       break;
7802 
7803       case PT_LAMP:
7804       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7805       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7806       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7807       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7808       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7809       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7810       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7811       break;
7812 
7813       case PT_GC:
7814       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7815       SET_TYPE_OFFSET(c);
7816       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7817       break;
7818 
7819       case PT_PC:
7820       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7821       break;
7822 
7823       case PT_SC:
7824       compares++;
7825       /* Do nothing. */
7826       break;
7827 
7828       case PT_SPACE:
7829       case PT_PXSPACE:
7830       SET_CHAR_OFFSET(9);
7831       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7832       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7833 
7834       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7835       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7836 
7837       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7838       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7839 
7840       SET_TYPE_OFFSET(ucp_Zl);
7841       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7842       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7843       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7844       break;
7845 
7846       case PT_WORD:
7847       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7848       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7849       /* Fall through. */
7850 
7851       case PT_ALNUM:
7852       SET_TYPE_OFFSET(ucp_Ll);
7853       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7854       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7855       SET_TYPE_OFFSET(ucp_Nd);
7856       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7857       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7858       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7859       break;
7860 
7861       case PT_CLIST:
7862       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7863 
7864       /* At least three characters are required.
7865          Otherwise this case would be handled by the normal code path. */
7866       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7867       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7868 
7869       /* Optimizing character pairs, if their difference is power of 2. */
7870       if (is_powerof2(other_cases[1] ^ other_cases[0]))
7871         {
7872         if (charoffset == 0)
7873           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7874         else
7875           {
7876           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7877           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7878           }
7879         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7880         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7881         other_cases += 2;
7882         }
7883       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7884         {
7885         if (charoffset == 0)
7886           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7887         else
7888           {
7889           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7890           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7891           }
7892         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7893         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7894 
7895         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7896         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7897 
7898         other_cases += 3;
7899         }
7900       else
7901         {
7902         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7903         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7904         }
7905 
7906       while (*other_cases != NOTACHAR)
7907         {
7908         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7909         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7910         }
7911       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7912       break;
7913 
7914       case PT_UCNC:
7915       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7916       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7917       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7918       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7919       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7920       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7921 
7922       SET_CHAR_OFFSET(0xa0);
7923       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7924       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7925       SET_CHAR_OFFSET(0);
7926       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7927       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7928       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7929       break;
7930 
7931       case PT_PXGRAPH:
7932       /* C and Z groups are the farthest two groups. */
7933       SET_TYPE_OFFSET(ucp_Ll);
7934       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7935       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7936 
7937       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7938 
7939       /* In case of ucp_Cf, we overwrite the result. */
7940       SET_CHAR_OFFSET(0x2066);
7941       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7942       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7943 
7944       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7945       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7946 
7947       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7948       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7949 
7950       JUMPHERE(jump);
7951       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7952       break;
7953 
7954       case PT_PXPRINT:
7955       /* C and Z groups are the farthest two groups. */
7956       SET_TYPE_OFFSET(ucp_Ll);
7957       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7958       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7959 
7960       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7961       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7962 
7963       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7964 
7965       /* In case of ucp_Cf, we overwrite the result. */
7966       SET_CHAR_OFFSET(0x2066);
7967       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7968       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7969 
7970       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7971       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7972 
7973       JUMPHERE(jump);
7974       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7975       break;
7976 
7977       case PT_PXPUNCT:
7978       SET_TYPE_OFFSET(ucp_Sc);
7979       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7980       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7981 
7982       SET_CHAR_OFFSET(0);
7983       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7984       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7985 
7986       SET_TYPE_OFFSET(ucp_Pc);
7987       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7988       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7989       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7990       break;
7991 
7992       default:
7993       SLJIT_UNREACHABLE();
7994       break;
7995       }
7996     cc += 2;
7997     }
7998 #endif /* SUPPORT_UNICODE */
7999 
8000   if (jump != NULL)
8001     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8002   }
8003 
8004 if (found != NULL)
8005   set_jumps(found, LABEL());
8006 }
8007 
8008 #undef SET_TYPE_OFFSET
8009 #undef SET_CHAR_OFFSET
8010 
8011 #endif
8012 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8013 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8014 {
8015 DEFINE_COMPILER;
8016 int length;
8017 struct sljit_jump *jump[4];
8018 #ifdef SUPPORT_UNICODE
8019 struct sljit_label *label;
8020 #endif /* SUPPORT_UNICODE */
8021 
8022 switch(type)
8023   {
8024   case OP_SOD:
8025   if (HAS_VIRTUAL_REGISTERS)
8026     {
8027     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8028     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8029     }
8030   else
8031     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8032   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8033   return cc;
8034 
8035   case OP_SOM:
8036   if (HAS_VIRTUAL_REGISTERS)
8037     {
8038     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8039     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8040     }
8041   else
8042     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8043   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8044   return cc;
8045 
8046   case OP_NOT_WORD_BOUNDARY:
8047   case OP_WORD_BOUNDARY:
8048   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8049 #ifdef SUPPORT_UNICODE
8050   if (common->invalid_utf)
8051     {
8052     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8053     return cc;
8054     }
8055 #endif /* SUPPORT_UNICODE */
8056   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8057   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8058   return cc;
8059 
8060   case OP_EODN:
8061   /* Requires rather complex checks. */
8062   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8063   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8064     {
8065     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8066     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8067     if (common->mode == PCRE2_JIT_COMPLETE)
8068       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8069     else
8070       {
8071       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8072       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8073       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8074       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8075       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8076       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8077       check_partial(common, TRUE);
8078       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8079       JUMPHERE(jump[1]);
8080       }
8081     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8082     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8083     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8084     }
8085   else if (common->nltype == NLTYPE_FIXED)
8086     {
8087     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8088     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8089     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8090     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8091     }
8092   else
8093     {
8094     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8095     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8096     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8097     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8098     jump[2] = JUMP(SLJIT_GREATER);
8099     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8100     /* Equal. */
8101     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8102     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8103     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8104 
8105     JUMPHERE(jump[1]);
8106     if (common->nltype == NLTYPE_ANYCRLF)
8107       {
8108       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8109       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8110       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8111       }
8112     else
8113       {
8114       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8115       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8116       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8117       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8118       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8119       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8120       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121       }
8122     JUMPHERE(jump[2]);
8123     JUMPHERE(jump[3]);
8124     }
8125   JUMPHERE(jump[0]);
8126   if (common->mode != PCRE2_JIT_COMPLETE)
8127     check_partial(common, TRUE);
8128   return cc;
8129 
8130   case OP_EOD:
8131   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8132   if (common->mode != PCRE2_JIT_COMPLETE)
8133     check_partial(common, TRUE);
8134   return cc;
8135 
8136   case OP_DOLL:
8137   if (HAS_VIRTUAL_REGISTERS)
8138     {
8139     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8140     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8141     }
8142   else
8143     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8144   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8145 
8146   if (!common->endonly)
8147     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8148   else
8149     {
8150     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8151     check_partial(common, FALSE);
8152     }
8153   return cc;
8154 
8155   case OP_DOLLM:
8156   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8157   if (HAS_VIRTUAL_REGISTERS)
8158     {
8159     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8160     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8161     }
8162   else
8163     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8164   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8165   check_partial(common, FALSE);
8166   jump[0] = JUMP(SLJIT_JUMP);
8167   JUMPHERE(jump[1]);
8168 
8169   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8170     {
8171     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8172     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8173     if (common->mode == PCRE2_JIT_COMPLETE)
8174       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8175     else
8176       {
8177       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8178       /* STR_PTR = STR_END - IN_UCHARS(1) */
8179       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8180       check_partial(common, TRUE);
8181       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8182       JUMPHERE(jump[1]);
8183       }
8184 
8185     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8186     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8187     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8188     }
8189   else
8190     {
8191     peek_char(common, common->nlmax, TMP3, 0, NULL);
8192     check_newlinechar(common, common->nltype, backtracks, FALSE);
8193     }
8194   JUMPHERE(jump[0]);
8195   return cc;
8196 
8197   case OP_CIRC:
8198   if (HAS_VIRTUAL_REGISTERS)
8199     {
8200     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8201     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8202     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8203     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8204     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8205     }
8206   else
8207     {
8208     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8209     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8210     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8211     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8212     }
8213   return cc;
8214 
8215   case OP_CIRCM:
8216   /* TMP2 might be used by peek_char_back. */
8217   if (HAS_VIRTUAL_REGISTERS)
8218     {
8219     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8220     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8221     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8222     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8223     }
8224   else
8225     {
8226     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8227     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8228     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8229     }
8230   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8231   jump[0] = JUMP(SLJIT_JUMP);
8232   JUMPHERE(jump[1]);
8233 
8234   if (!common->alt_circumflex)
8235     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8236 
8237   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8238     {
8239     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8240     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8241     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8242     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8243     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8244     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8245     }
8246   else
8247     {
8248     peek_char_back(common, common->nlmax, backtracks);
8249     check_newlinechar(common, common->nltype, backtracks, FALSE);
8250     }
8251   JUMPHERE(jump[0]);
8252   return cc;
8253 
8254   case OP_REVERSE:
8255   length = GET(cc, 0);
8256   if (length == 0)
8257     return cc + LINK_SIZE;
8258   if (HAS_VIRTUAL_REGISTERS)
8259     {
8260     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8261     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8262     }
8263   else
8264     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8265 #ifdef SUPPORT_UNICODE
8266   if (common->utf)
8267     {
8268     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8269     label = LABEL();
8270     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8271     move_back(common, backtracks, FALSE);
8272     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8273     JUMPTO(SLJIT_NOT_ZERO, label);
8274     }
8275   else
8276 #endif
8277     {
8278     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8279     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8280     }
8281   check_start_used_ptr(common);
8282   return cc + LINK_SIZE;
8283   }
8284 SLJIT_UNREACHABLE();
8285 return cc;
8286 }
8287 
8288 #ifdef SUPPORT_UNICODE
8289 
8290 #if PCRE2_CODE_UNIT_WIDTH != 32
8291 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8292 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8293 {
8294 PCRE2_SPTR start_subject = args->begin;
8295 PCRE2_SPTR end_subject = args->end;
8296 int lgb, rgb, ricount;
8297 PCRE2_SPTR prevcc, endcc, bptr;
8298 BOOL first = TRUE;
8299 uint32_t c;
8300 
8301 prevcc = cc;
8302 endcc = NULL;
8303 do
8304   {
8305   GETCHARINC(c, cc);
8306   rgb = UCD_GRAPHBREAK(c);
8307 
8308   if (first)
8309     {
8310     lgb = rgb;
8311     endcc = cc;
8312     first = FALSE;
8313     continue;
8314     }
8315 
8316   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8317     break;
8318 
8319   /* Not breaking between Regional Indicators is allowed only if there
8320   are an even number of preceding RIs. */
8321 
8322   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8323     {
8324     ricount = 0;
8325     bptr = prevcc;
8326 
8327     /* bptr is pointing to the left-hand character */
8328     while (bptr > start_subject)
8329       {
8330       bptr--;
8331       BACKCHAR(bptr);
8332       GETCHAR(c, bptr);
8333 
8334       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8335         break;
8336 
8337       ricount++;
8338       }
8339 
8340     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8341     }
8342 
8343   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8344   allows any number of them before a following Extended_Pictographic. */
8345 
8346   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8347        lgb != ucp_gbExtended_Pictographic)
8348     lgb = rgb;
8349 
8350   prevcc = endcc;
8351   endcc = cc;
8352   }
8353 while (cc < end_subject);
8354 
8355 return endcc;
8356 }
8357 
8358 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8359 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8360 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8361 {
8362 PCRE2_SPTR start_subject = args->begin;
8363 PCRE2_SPTR end_subject = args->end;
8364 int lgb, rgb, ricount;
8365 PCRE2_SPTR prevcc, endcc, bptr;
8366 BOOL first = TRUE;
8367 uint32_t c;
8368 
8369 prevcc = cc;
8370 endcc = NULL;
8371 do
8372   {
8373   GETCHARINC_INVALID(c, cc, end_subject, break);
8374   rgb = UCD_GRAPHBREAK(c);
8375 
8376   if (first)
8377     {
8378     lgb = rgb;
8379     endcc = cc;
8380     first = FALSE;
8381     continue;
8382     }
8383 
8384   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8385     break;
8386 
8387   /* Not breaking between Regional Indicators is allowed only if there
8388   are an even number of preceding RIs. */
8389 
8390   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8391     {
8392     ricount = 0;
8393     bptr = prevcc;
8394 
8395     /* bptr is pointing to the left-hand character */
8396     while (bptr > start_subject)
8397       {
8398       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8399 
8400       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8401         break;
8402 
8403       ricount++;
8404       }
8405 
8406     if ((ricount & 1) != 0)
8407       break;  /* Grapheme break required */
8408     }
8409 
8410   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8411   allows any number of them before a following Extended_Pictographic. */
8412 
8413   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8414        lgb != ucp_gbExtended_Pictographic)
8415     lgb = rgb;
8416 
8417   prevcc = endcc;
8418   endcc = cc;
8419   }
8420 while (cc < end_subject);
8421 
8422 return endcc;
8423 }
8424 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8425 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8426 {
8427 PCRE2_SPTR start_subject = args->begin;
8428 PCRE2_SPTR end_subject = args->end;
8429 int lgb, rgb, ricount;
8430 PCRE2_SPTR bptr;
8431 uint32_t c;
8432 
8433 /* Patch by PH */
8434 /* GETCHARINC(c, cc); */
8435 c = *cc++;
8436 
8437 #if PCRE2_CODE_UNIT_WIDTH == 32
8438 if (c >= 0x110000)
8439   return NULL;
8440 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8441 lgb = UCD_GRAPHBREAK(c);
8442 
8443 while (cc < end_subject)
8444   {
8445   c = *cc;
8446 #if PCRE2_CODE_UNIT_WIDTH == 32
8447   if (c >= 0x110000)
8448     break;
8449 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8450   rgb = UCD_GRAPHBREAK(c);
8451 
8452   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8453     break;
8454 
8455   /* Not breaking between Regional Indicators is allowed only if there
8456   are an even number of preceding RIs. */
8457 
8458   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8459     {
8460     ricount = 0;
8461     bptr = cc - 1;
8462 
8463     /* bptr is pointing to the left-hand character */
8464     while (bptr > start_subject)
8465       {
8466       bptr--;
8467       c = *bptr;
8468 #if PCRE2_CODE_UNIT_WIDTH == 32
8469       if (c >= 0x110000)
8470         break;
8471 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8472 
8473       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8474 
8475       ricount++;
8476       }
8477 
8478     if ((ricount & 1) != 0)
8479       break;  /* Grapheme break required */
8480     }
8481 
8482   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8483   allows any number of them before a following Extended_Pictographic. */
8484 
8485   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8486        lgb != ucp_gbExtended_Pictographic)
8487     lgb = rgb;
8488 
8489   cc++;
8490   }
8491 
8492 return cc;
8493 }
8494 
8495 #endif /* SUPPORT_UNICODE */
8496 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8497 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8498 {
8499 DEFINE_COMPILER;
8500 int length;
8501 unsigned int c, oc, bit;
8502 compare_context context;
8503 struct sljit_jump *jump[3];
8504 jump_list *end_list;
8505 #ifdef SUPPORT_UNICODE
8506 PCRE2_UCHAR propdata[5];
8507 #endif /* SUPPORT_UNICODE */
8508 
8509 switch(type)
8510   {
8511   case OP_NOT_DIGIT:
8512   case OP_DIGIT:
8513   /* Digits are usually 0-9, so it is worth to optimize them. */
8514   if (check_str_ptr)
8515     detect_partial_match(common, backtracks);
8516 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8517   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8518     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8519   else
8520 #endif
8521     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8522     /* Flip the starting bit in the negative case. */
8523   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8524   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8525   return cc;
8526 
8527   case OP_NOT_WHITESPACE:
8528   case OP_WHITESPACE:
8529   if (check_str_ptr)
8530     detect_partial_match(common, backtracks);
8531 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8532   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8533     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8534   else
8535 #endif
8536     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8537   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8538   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8539   return cc;
8540 
8541   case OP_NOT_WORDCHAR:
8542   case OP_WORDCHAR:
8543   if (check_str_ptr)
8544     detect_partial_match(common, backtracks);
8545 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8546   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8547     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8548   else
8549 #endif
8550     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8551   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8552   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8553   return cc;
8554 
8555   case OP_ANY:
8556   if (check_str_ptr)
8557     detect_partial_match(common, backtracks);
8558   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8559   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8560     {
8561     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8562     end_list = NULL;
8563     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8564       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8565     else
8566       check_str_end(common, &end_list);
8567 
8568     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8569     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8570     set_jumps(end_list, LABEL());
8571     JUMPHERE(jump[0]);
8572     }
8573   else
8574     check_newlinechar(common, common->nltype, backtracks, TRUE);
8575   return cc;
8576 
8577   case OP_ALLANY:
8578   if (check_str_ptr)
8579     detect_partial_match(common, backtracks);
8580 #ifdef SUPPORT_UNICODE
8581   if (common->utf)
8582     {
8583     if (common->invalid_utf)
8584       {
8585       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8586       return cc;
8587       }
8588 
8589 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8590     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8591     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8592 #if PCRE2_CODE_UNIT_WIDTH == 8
8593     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8594     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8595     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8596 #elif PCRE2_CODE_UNIT_WIDTH == 16
8597     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8598     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8599     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8600     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8601     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8602     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8604     JUMPHERE(jump[0]);
8605     return cc;
8606 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8607     }
8608 #endif /* SUPPORT_UNICODE */
8609   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8610   return cc;
8611 
8612   case OP_ANYBYTE:
8613   if (check_str_ptr)
8614     detect_partial_match(common, backtracks);
8615   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8616   return cc;
8617 
8618 #ifdef SUPPORT_UNICODE
8619   case OP_NOTPROP:
8620   case OP_PROP:
8621   propdata[0] = XCL_HASPROP;
8622   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8623   propdata[2] = cc[0];
8624   propdata[3] = cc[1];
8625   propdata[4] = XCL_END;
8626   if (check_str_ptr)
8627     detect_partial_match(common, backtracks);
8628   compile_xclass_matchingpath(common, propdata, backtracks);
8629   return cc + 2;
8630 #endif
8631 
8632   case OP_ANYNL:
8633   if (check_str_ptr)
8634     detect_partial_match(common, backtracks);
8635   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8636   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8637   /* We don't need to handle soft partial matching case. */
8638   end_list = NULL;
8639   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8640     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8641   else
8642     check_str_end(common, &end_list);
8643   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8644   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8645   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8646   jump[2] = JUMP(SLJIT_JUMP);
8647   JUMPHERE(jump[0]);
8648   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8649   set_jumps(end_list, LABEL());
8650   JUMPHERE(jump[1]);
8651   JUMPHERE(jump[2]);
8652   return cc;
8653 
8654   case OP_NOT_HSPACE:
8655   case OP_HSPACE:
8656   if (check_str_ptr)
8657     detect_partial_match(common, backtracks);
8658 
8659   if (type == OP_NOT_HSPACE)
8660     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8661   else
8662     read_char(common, 0x9, 0x3000, NULL, 0);
8663 
8664   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8665   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8666   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8667   return cc;
8668 
8669   case OP_NOT_VSPACE:
8670   case OP_VSPACE:
8671   if (check_str_ptr)
8672     detect_partial_match(common, backtracks);
8673 
8674   if (type == OP_NOT_VSPACE)
8675     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8676   else
8677     read_char(common, 0xa, 0x2029, NULL, 0);
8678 
8679   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8680   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8681   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8682   return cc;
8683 
8684 #ifdef SUPPORT_UNICODE
8685   case OP_EXTUNI:
8686   if (check_str_ptr)
8687     detect_partial_match(common, backtracks);
8688 
8689   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8690   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8691 
8692 #if PCRE2_CODE_UNIT_WIDTH != 32
8693   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8694     common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8695   if (common->invalid_utf)
8696     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8697 #else
8698   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8699     common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8700   if (!common->utf || common->invalid_utf)
8701     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8702 #endif
8703 
8704   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8705 
8706   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8707     {
8708     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8709     /* Since we successfully read a char above, partial matching must occure. */
8710     check_partial(common, TRUE);
8711     JUMPHERE(jump[0]);
8712     }
8713   return cc;
8714 #endif
8715 
8716   case OP_CHAR:
8717   case OP_CHARI:
8718   length = 1;
8719 #ifdef SUPPORT_UNICODE
8720   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8721 #endif
8722 
8723   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8724     detect_partial_match(common, backtracks);
8725 
8726   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8727     {
8728     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8729     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8730       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8731 
8732     context.length = IN_UCHARS(length);
8733     context.sourcereg = -1;
8734 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8735     context.ucharptr = 0;
8736 #endif
8737     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8738     }
8739 
8740 #ifdef SUPPORT_UNICODE
8741   if (common->utf)
8742     {
8743     GETCHAR(c, cc);
8744     }
8745   else
8746 #endif
8747     c = *cc;
8748 
8749   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8750 
8751   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8752     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8753 
8754   oc = char_othercase(common, c);
8755   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8756 
8757   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8758 
8759   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8760     {
8761     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8762     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8763     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8764     }
8765   else
8766     {
8767     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8768     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8769     JUMPHERE(jump[0]);
8770     }
8771   return cc + length;
8772 
8773   case OP_NOT:
8774   case OP_NOTI:
8775   if (check_str_ptr)
8776     detect_partial_match(common, backtracks);
8777 
8778   length = 1;
8779 #ifdef SUPPORT_UNICODE
8780   if (common->utf)
8781     {
8782 #if PCRE2_CODE_UNIT_WIDTH == 8
8783     c = *cc;
8784     if (c < 128 && !common->invalid_utf)
8785       {
8786       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8787       if (type == OP_NOT || !char_has_othercase(common, cc))
8788         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8789       else
8790         {
8791         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8792         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8793         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8794         }
8795       /* Skip the variable-length character. */
8796       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8797       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8798       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8799       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8800       JUMPHERE(jump[0]);
8801       return cc + 1;
8802       }
8803     else
8804 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8805       {
8806       GETCHARLEN(c, cc, length);
8807       }
8808     }
8809   else
8810 #endif /* SUPPORT_UNICODE */
8811     c = *cc;
8812 
8813   if (type == OP_NOT || !char_has_othercase(common, cc))
8814     {
8815     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8816     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8817     }
8818   else
8819     {
8820     oc = char_othercase(common, c);
8821     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8822     bit = c ^ oc;
8823     if (is_powerof2(bit))
8824       {
8825       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8826       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8827       }
8828     else
8829       {
8830       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8831       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8832       }
8833     }
8834   return cc + length;
8835 
8836   case OP_CLASS:
8837   case OP_NCLASS:
8838   if (check_str_ptr)
8839     detect_partial_match(common, backtracks);
8840 
8841 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8842   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8843   if (type == OP_NCLASS)
8844     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8845   else
8846     read_char(common, 0, bit, NULL, 0);
8847 #else
8848   if (type == OP_NCLASS)
8849     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8850   else
8851     read_char(common, 0, 255, NULL, 0);
8852 #endif
8853 
8854   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8855     return cc + 32 / sizeof(PCRE2_UCHAR);
8856 
8857 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8858   jump[0] = NULL;
8859   if (common->utf)
8860     {
8861     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8862     if (type == OP_CLASS)
8863       {
8864       add_jump(compiler, backtracks, jump[0]);
8865       jump[0] = NULL;
8866       }
8867     }
8868 #elif PCRE2_CODE_UNIT_WIDTH != 8
8869   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8870   if (type == OP_CLASS)
8871     {
8872     add_jump(compiler, backtracks, jump[0]);
8873     jump[0] = NULL;
8874     }
8875 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8876 
8877   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8878   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8879   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8880   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8881   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8882   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8883 
8884 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8885   if (jump[0] != NULL)
8886     JUMPHERE(jump[0]);
8887 #endif
8888   return cc + 32 / sizeof(PCRE2_UCHAR);
8889 
8890 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8891   case OP_XCLASS:
8892   if (check_str_ptr)
8893     detect_partial_match(common, backtracks);
8894   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8895   return cc + GET(cc, 0) - 1;
8896 #endif
8897   }
8898 SLJIT_UNREACHABLE();
8899 return cc;
8900 }
8901 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8902 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8903 {
8904 /* This function consumes at least one input character. */
8905 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8906 DEFINE_COMPILER;
8907 PCRE2_SPTR ccbegin = cc;
8908 compare_context context;
8909 int size;
8910 
8911 context.length = 0;
8912 do
8913   {
8914   if (cc >= ccend)
8915     break;
8916 
8917   if (*cc == OP_CHAR)
8918     {
8919     size = 1;
8920 #ifdef SUPPORT_UNICODE
8921     if (common->utf && HAS_EXTRALEN(cc[1]))
8922       size += GET_EXTRALEN(cc[1]);
8923 #endif
8924     }
8925   else if (*cc == OP_CHARI)
8926     {
8927     size = 1;
8928 #ifdef SUPPORT_UNICODE
8929     if (common->utf)
8930       {
8931       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8932         size = 0;
8933       else if (HAS_EXTRALEN(cc[1]))
8934         size += GET_EXTRALEN(cc[1]);
8935       }
8936     else
8937 #endif
8938     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8939       size = 0;
8940     }
8941   else
8942     size = 0;
8943 
8944   cc += 1 + size;
8945   context.length += IN_UCHARS(size);
8946   }
8947 while (size > 0 && context.length <= 128);
8948 
8949 cc = ccbegin;
8950 if (context.length > 0)
8951   {
8952   /* We have a fixed-length byte sequence. */
8953   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8954   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8955 
8956   context.sourcereg = -1;
8957 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8958   context.ucharptr = 0;
8959 #endif
8960   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8961   return cc;
8962   }
8963 
8964 /* A non-fixed length character will be checked if length == 0. */
8965 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8966 }
8967 
8968 /* Forward definitions. */
8969 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8970 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8971 
8972 #define PUSH_BACKTRACK(size, ccstart, error) \
8973   do \
8974     { \
8975     backtrack = sljit_alloc_memory(compiler, (size)); \
8976     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8977       return error; \
8978     memset(backtrack, 0, size); \
8979     backtrack->prev = parent->top; \
8980     backtrack->cc = (ccstart); \
8981     parent->top = backtrack; \
8982     } \
8983   while (0)
8984 
8985 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8986   do \
8987     { \
8988     backtrack = sljit_alloc_memory(compiler, (size)); \
8989     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8990       return; \
8991     memset(backtrack, 0, size); \
8992     backtrack->prev = parent->top; \
8993     backtrack->cc = (ccstart); \
8994     parent->top = backtrack; \
8995     } \
8996   while (0)
8997 
8998 #define BACKTRACK_AS(type) ((type *)backtrack)
8999 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9000 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9001 {
9002 /* The OVECTOR offset goes to TMP2. */
9003 DEFINE_COMPILER;
9004 int count = GET2(cc, 1 + IMM2_SIZE);
9005 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9006 unsigned int offset;
9007 jump_list *found = NULL;
9008 
9009 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9010 
9011 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9012 
9013 count--;
9014 while (count-- > 0)
9015   {
9016   offset = GET2(slot, 0) << 1;
9017   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9018   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9019   slot += common->name_entry_size;
9020   }
9021 
9022 offset = GET2(slot, 0) << 1;
9023 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9024 if (backtracks != NULL && !common->unset_backref)
9025   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9026 
9027 set_jumps(found, LABEL());
9028 }
9029 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9030 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9031 {
9032 DEFINE_COMPILER;
9033 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9034 int offset = 0;
9035 struct sljit_jump *jump = NULL;
9036 struct sljit_jump *partial;
9037 struct sljit_jump *nopartial;
9038 #if defined SUPPORT_UNICODE
9039 struct sljit_label *loop;
9040 struct sljit_label *caseless_loop;
9041 jump_list *no_match = NULL;
9042 int source_reg = COUNT_MATCH;
9043 int source_end_reg = ARGUMENTS;
9044 int char1_reg = STACK_LIMIT;
9045 #endif /* SUPPORT_UNICODE */
9046 
9047 if (ref)
9048   {
9049   offset = GET2(cc, 1) << 1;
9050   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9051   /* OVECTOR(1) contains the "string begin - 1" constant. */
9052   if (withchecks && !common->unset_backref)
9053     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9054   }
9055 else
9056   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9057 
9058 #if defined SUPPORT_UNICODE
9059 if (common->utf && *cc == OP_REFI)
9060   {
9061   SLJIT_ASSERT(common->iref_ptr != 0);
9062 
9063   if (ref)
9064     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9065   else
9066     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9067 
9068   if (withchecks && emptyfail)
9069     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9070 
9071   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9072   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9073   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9074 
9075   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9076   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9077 
9078   loop = LABEL();
9079   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9080   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9081 
9082   /* Read original character. It must be a valid UTF character. */
9083   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9084   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9085 
9086   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9087 
9088   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9089   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9090   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9091 
9092   /* Read second character. */
9093   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9094 
9095   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9096 
9097   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9098 
9099   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9100 
9101   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9102   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9103   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9104 
9105   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9106 
9107   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9108   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9109   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9110   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9111 
9112   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9113   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9114   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9115 
9116   caseless_loop = LABEL();
9117   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9118   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9119   OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9120   JUMPTO(SLJIT_EQUAL, loop);
9121   JUMPTO(SLJIT_LESS, caseless_loop);
9122 
9123   set_jumps(no_match, LABEL());
9124   if (common->mode == PCRE2_JIT_COMPLETE)
9125     JUMPHERE(partial);
9126 
9127   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9128   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9129   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9130   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9131 
9132   if (common->mode != PCRE2_JIT_COMPLETE)
9133     {
9134     JUMPHERE(partial);
9135     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9136     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9137     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9138 
9139     check_partial(common, FALSE);
9140     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9141     }
9142 
9143   JUMPHERE(jump);
9144   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9145   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9146   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9147   return;
9148   }
9149 else
9150 #endif /* SUPPORT_UNICODE */
9151   {
9152   if (ref)
9153     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9154   else
9155     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9156 
9157   if (withchecks)
9158     jump = JUMP(SLJIT_ZERO);
9159 
9160   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9161   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9162   if (common->mode == PCRE2_JIT_COMPLETE)
9163     add_jump(compiler, backtracks, partial);
9164 
9165   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9166   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9167 
9168   if (common->mode != PCRE2_JIT_COMPLETE)
9169     {
9170     nopartial = JUMP(SLJIT_JUMP);
9171     JUMPHERE(partial);
9172     /* TMP2 -= STR_END - STR_PTR */
9173     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9174     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9175     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9176     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9177     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9178     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9179     JUMPHERE(partial);
9180     check_partial(common, FALSE);
9181     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9182     JUMPHERE(nopartial);
9183     }
9184   }
9185 
9186 if (jump != NULL)
9187   {
9188   if (emptyfail)
9189     add_jump(compiler, backtracks, jump);
9190   else
9191     JUMPHERE(jump);
9192   }
9193 }
9194 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9195 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9196 {
9197 DEFINE_COMPILER;
9198 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9199 backtrack_common *backtrack;
9200 PCRE2_UCHAR type;
9201 int offset = 0;
9202 struct sljit_label *label;
9203 struct sljit_jump *zerolength;
9204 struct sljit_jump *jump = NULL;
9205 PCRE2_SPTR ccbegin = cc;
9206 int min = 0, max = 0;
9207 BOOL minimize;
9208 
9209 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9210 
9211 if (ref)
9212   offset = GET2(cc, 1) << 1;
9213 else
9214   cc += IMM2_SIZE;
9215 type = cc[1 + IMM2_SIZE];
9216 
9217 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9218 minimize = (type & 0x1) != 0;
9219 switch(type)
9220   {
9221   case OP_CRSTAR:
9222   case OP_CRMINSTAR:
9223   min = 0;
9224   max = 0;
9225   cc += 1 + IMM2_SIZE + 1;
9226   break;
9227   case OP_CRPLUS:
9228   case OP_CRMINPLUS:
9229   min = 1;
9230   max = 0;
9231   cc += 1 + IMM2_SIZE + 1;
9232   break;
9233   case OP_CRQUERY:
9234   case OP_CRMINQUERY:
9235   min = 0;
9236   max = 1;
9237   cc += 1 + IMM2_SIZE + 1;
9238   break;
9239   case OP_CRRANGE:
9240   case OP_CRMINRANGE:
9241   min = GET2(cc, 1 + IMM2_SIZE + 1);
9242   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9243   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9244   break;
9245   default:
9246   SLJIT_UNREACHABLE();
9247   break;
9248   }
9249 
9250 if (!minimize)
9251   {
9252   if (min == 0)
9253     {
9254     allocate_stack(common, 2);
9255     if (ref)
9256       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9257     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9258     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9259     /* Temporary release of STR_PTR. */
9260     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9261     /* Handles both invalid and empty cases. Since the minimum repeat,
9262     is zero the invalid case is basically the same as an empty case. */
9263     if (ref)
9264       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9265     else
9266       {
9267       compile_dnref_search(common, ccbegin, NULL);
9268       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9269       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9270       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9271       }
9272     /* Restore if not zero length. */
9273     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9274     }
9275   else
9276     {
9277     allocate_stack(common, 1);
9278     if (ref)
9279       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9280     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9281     if (ref)
9282       {
9283       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9284       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9285       }
9286     else
9287       {
9288       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9289       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9290       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9291       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9292       }
9293     }
9294 
9295   if (min > 1 || max > 1)
9296     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9297 
9298   label = LABEL();
9299   if (!ref)
9300     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9301   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9302 
9303   if (min > 1 || max > 1)
9304     {
9305     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9306     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9307     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9308     if (min > 1)
9309       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9310     if (max > 1)
9311       {
9312       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9313       allocate_stack(common, 1);
9314       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9315       JUMPTO(SLJIT_JUMP, label);
9316       JUMPHERE(jump);
9317       }
9318     }
9319 
9320   if (max == 0)
9321     {
9322     /* Includes min > 1 case as well. */
9323     allocate_stack(common, 1);
9324     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9325     JUMPTO(SLJIT_JUMP, label);
9326     }
9327 
9328   JUMPHERE(zerolength);
9329   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9330 
9331   count_match(common);
9332   return cc;
9333   }
9334 
9335 allocate_stack(common, ref ? 2 : 3);
9336 if (ref)
9337   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9339 if (type != OP_CRMINSTAR)
9340   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9341 
9342 if (min == 0)
9343   {
9344   /* Handles both invalid and empty cases. Since the minimum repeat,
9345   is zero the invalid case is basically the same as an empty case. */
9346   if (ref)
9347     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9348   else
9349     {
9350     compile_dnref_search(common, ccbegin, NULL);
9351     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9352     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9353     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9354     }
9355   /* Length is non-zero, we can match real repeats. */
9356   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9357   jump = JUMP(SLJIT_JUMP);
9358   }
9359 else
9360   {
9361   if (ref)
9362     {
9363     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9364     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9365     }
9366   else
9367     {
9368     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9369     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9370     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9371     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9372     }
9373   }
9374 
9375 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9376 if (max > 0)
9377   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9378 
9379 if (!ref)
9380   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9381 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9383 
9384 if (min > 1)
9385   {
9386   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9387   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9388   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9389   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9390   }
9391 else if (max > 0)
9392   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9393 
9394 if (jump != NULL)
9395   JUMPHERE(jump);
9396 JUMPHERE(zerolength);
9397 
9398 count_match(common);
9399 return cc;
9400 }
9401 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9402 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9403 {
9404 DEFINE_COMPILER;
9405 backtrack_common *backtrack;
9406 recurse_entry *entry = common->entries;
9407 recurse_entry *prev = NULL;
9408 sljit_sw start = GET(cc, 1);
9409 PCRE2_SPTR start_cc;
9410 BOOL needs_control_head;
9411 
9412 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9413 
9414 /* Inlining simple patterns. */
9415 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9416   {
9417   start_cc = common->start + start;
9418   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9419   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9420   return cc + 1 + LINK_SIZE;
9421   }
9422 
9423 while (entry != NULL)
9424   {
9425   if (entry->start == start)
9426     break;
9427   prev = entry;
9428   entry = entry->next;
9429   }
9430 
9431 if (entry == NULL)
9432   {
9433   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9434   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9435     return NULL;
9436   entry->next = NULL;
9437   entry->entry_label = NULL;
9438   entry->backtrack_label = NULL;
9439   entry->entry_calls = NULL;
9440   entry->backtrack_calls = NULL;
9441   entry->start = start;
9442 
9443   if (prev != NULL)
9444     prev->next = entry;
9445   else
9446     common->entries = entry;
9447   }
9448 
9449 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9450 
9451 if (entry->entry_label == NULL)
9452   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9453 else
9454   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9455 /* Leave if the match is failed. */
9456 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9457 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9458 return cc + 1 + LINK_SIZE;
9459 }
9460 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9461 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9462 {
9463 PCRE2_SPTR begin;
9464 PCRE2_SIZE *ovector;
9465 sljit_u32 oveccount, capture_top;
9466 
9467 if (arguments->callout == NULL)
9468   return 0;
9469 
9470 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9471 
9472 begin = arguments->begin;
9473 ovector = (PCRE2_SIZE*)(callout_block + 1);
9474 oveccount = callout_block->capture_top;
9475 
9476 SLJIT_ASSERT(oveccount >= 1);
9477 
9478 callout_block->version = 2;
9479 callout_block->callout_flags = 0;
9480 
9481 /* Offsets in subject. */
9482 callout_block->subject_length = arguments->end - arguments->begin;
9483 callout_block->start_match = jit_ovector[0] - begin;
9484 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9485 callout_block->subject = begin;
9486 
9487 /* Convert and copy the JIT offset vector to the ovector array. */
9488 callout_block->capture_top = 1;
9489 callout_block->offset_vector = ovector;
9490 
9491 ovector[0] = PCRE2_UNSET;
9492 ovector[1] = PCRE2_UNSET;
9493 ovector += 2;
9494 jit_ovector += 2;
9495 capture_top = 1;
9496 
9497 /* Convert pointers to sizes. */
9498 while (--oveccount != 0)
9499   {
9500   capture_top++;
9501 
9502   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9503   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9504 
9505   if (ovector[0] != PCRE2_UNSET)
9506     callout_block->capture_top = capture_top;
9507 
9508   ovector += 2;
9509   jit_ovector += 2;
9510   }
9511 
9512 return (arguments->callout)(callout_block, arguments->callout_data);
9513 }
9514 
9515 #define CALLOUT_ARG_OFFSET(arg) \
9516     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9517 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9518 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9519 {
9520 DEFINE_COMPILER;
9521 backtrack_common *backtrack;
9522 sljit_s32 mov_opcode;
9523 unsigned int callout_length = (*cc == OP_CALLOUT)
9524     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9525 sljit_sw value1;
9526 sljit_sw value2;
9527 sljit_sw value3;
9528 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9529 
9530 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9531 
9532 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9533 
9534 allocate_stack(common, callout_arg_size);
9535 
9536 SLJIT_ASSERT(common->capture_last_ptr != 0);
9537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9538 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9539 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9540 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9541 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9542 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9543 
9544 /* These pointer sized fields temporarly stores internal variables. */
9545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9546 
9547 if (common->mark_ptr != 0)
9548   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9549 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9550 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9551 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9552 
9553 if (*cc == OP_CALLOUT)
9554   {
9555   value1 = 0;
9556   value2 = 0;
9557   value3 = 0;
9558   }
9559 else
9560   {
9561   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9562   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9563   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9564   }
9565 
9566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9567 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9568 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9570 
9571 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9572 
9573 /* Needed to save important temporary registers. */
9574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9575 /* SLJIT_R0 = arguments */
9576 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9577 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9578 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9579 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9580 free_stack(common, callout_arg_size);
9581 
9582 /* Check return value. */
9583 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9584 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9585 if (common->abort_label == NULL)
9586   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9587 else
9588   JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9589 return cc + callout_length;
9590 }
9591 
9592 #undef CALLOUT_ARG_SIZE
9593 #undef CALLOUT_ARG_OFFSET
9594 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9595 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9596 {
9597 while (TRUE)
9598   {
9599   switch (*cc)
9600     {
9601     case OP_CALLOUT_STR:
9602     cc += GET(cc, 1 + 2*LINK_SIZE);
9603     break;
9604 
9605     case OP_NOT_WORD_BOUNDARY:
9606     case OP_WORD_BOUNDARY:
9607     case OP_CIRC:
9608     case OP_CIRCM:
9609     case OP_DOLL:
9610     case OP_DOLLM:
9611     case OP_CALLOUT:
9612     case OP_ALT:
9613     cc += PRIV(OP_lengths)[*cc];
9614     break;
9615 
9616     case OP_KET:
9617     return FALSE;
9618 
9619     default:
9620     return TRUE;
9621     }
9622   }
9623 }
9624 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9625 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9626 {
9627 DEFINE_COMPILER;
9628 int framesize;
9629 int extrasize;
9630 BOOL local_quit_available = FALSE;
9631 BOOL needs_control_head;
9632 int private_data_ptr;
9633 backtrack_common altbacktrack;
9634 PCRE2_SPTR ccbegin;
9635 PCRE2_UCHAR opcode;
9636 PCRE2_UCHAR bra = OP_BRA;
9637 jump_list *tmp = NULL;
9638 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9639 jump_list **found;
9640 /* Saving previous accept variables. */
9641 BOOL save_local_quit_available = common->local_quit_available;
9642 BOOL save_in_positive_assertion = common->in_positive_assertion;
9643 then_trap_backtrack *save_then_trap = common->then_trap;
9644 struct sljit_label *save_quit_label = common->quit_label;
9645 struct sljit_label *save_accept_label = common->accept_label;
9646 jump_list *save_quit = common->quit;
9647 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9648 jump_list *save_accept = common->accept;
9649 struct sljit_jump *jump;
9650 struct sljit_jump *brajump = NULL;
9651 
9652 /* Assert captures then. */
9653 common->then_trap = NULL;
9654 
9655 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9656   {
9657   SLJIT_ASSERT(!conditional);
9658   bra = *cc;
9659   cc++;
9660   }
9661 private_data_ptr = PRIVATE_DATA(cc);
9662 SLJIT_ASSERT(private_data_ptr != 0);
9663 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9664 backtrack->framesize = framesize;
9665 backtrack->private_data_ptr = private_data_ptr;
9666 opcode = *cc;
9667 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9668 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9669 ccbegin = cc;
9670 cc += GET(cc, 1);
9671 
9672 if (bra == OP_BRAMINZERO)
9673   {
9674   /* This is a braminzero backtrack path. */
9675   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676   free_stack(common, 1);
9677   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9678   }
9679 
9680 if (framesize < 0)
9681   {
9682   extrasize = 1;
9683   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9684     extrasize = 0;
9685 
9686   if (needs_control_head)
9687     extrasize++;
9688 
9689   if (framesize == no_frame)
9690     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9691 
9692   if (extrasize > 0)
9693     allocate_stack(common, extrasize);
9694 
9695   if (needs_control_head)
9696     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9697 
9698   if (extrasize > 0)
9699     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9700 
9701   if (needs_control_head)
9702     {
9703     SLJIT_ASSERT(extrasize == 2);
9704     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9705     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9706     }
9707   }
9708 else
9709   {
9710   extrasize = needs_control_head ? 3 : 2;
9711   allocate_stack(common, framesize + extrasize);
9712 
9713   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9714   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9715   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9716   if (needs_control_head)
9717     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9718   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9719 
9720   if (needs_control_head)
9721     {
9722     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9723     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9724     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9725     }
9726   else
9727     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9728 
9729   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9730   }
9731 
9732 memset(&altbacktrack, 0, sizeof(backtrack_common));
9733 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9734   {
9735   /* Control verbs cannot escape from these asserts. */
9736   local_quit_available = TRUE;
9737   common->local_quit_available = TRUE;
9738   common->quit_label = NULL;
9739   common->quit = NULL;
9740   }
9741 
9742 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9743 common->positive_assertion_quit = NULL;
9744 
9745 while (1)
9746   {
9747   common->accept_label = NULL;
9748   common->accept = NULL;
9749   altbacktrack.top = NULL;
9750   altbacktrack.topbacktracks = NULL;
9751 
9752   if (*ccbegin == OP_ALT && extrasize > 0)
9753     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9754 
9755   altbacktrack.cc = ccbegin;
9756   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9757   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9758     {
9759     if (local_quit_available)
9760       {
9761       common->local_quit_available = save_local_quit_available;
9762       common->quit_label = save_quit_label;
9763       common->quit = save_quit;
9764       }
9765     common->in_positive_assertion = save_in_positive_assertion;
9766     common->then_trap = save_then_trap;
9767     common->accept_label = save_accept_label;
9768     common->positive_assertion_quit = save_positive_assertion_quit;
9769     common->accept = save_accept;
9770     return NULL;
9771     }
9772   common->accept_label = LABEL();
9773   if (common->accept != NULL)
9774     set_jumps(common->accept, common->accept_label);
9775 
9776   /* Reset stack. */
9777   if (framesize < 0)
9778     {
9779     if (framesize == no_frame)
9780       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9781     else if (extrasize > 0)
9782       free_stack(common, extrasize);
9783 
9784     if (needs_control_head)
9785       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9786     }
9787   else
9788     {
9789     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9790       {
9791       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9792       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9793       if (needs_control_head)
9794         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9795       }
9796     else
9797       {
9798       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9799       if (needs_control_head)
9800         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9801       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9803       }
9804     }
9805 
9806   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9807     {
9808     /* We know that STR_PTR was stored on the top of the stack. */
9809     if (conditional)
9810       {
9811       if (extrasize > 0)
9812         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9813       }
9814     else if (bra == OP_BRAZERO)
9815       {
9816       if (framesize < 0)
9817         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9818       else
9819         {
9820         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9821         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9822         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9823         }
9824       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9825       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9826       }
9827     else if (framesize >= 0)
9828       {
9829       /* For OP_BRA and OP_BRAMINZERO. */
9830       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9831       }
9832     }
9833   add_jump(compiler, found, JUMP(SLJIT_JUMP));
9834 
9835   compile_backtrackingpath(common, altbacktrack.top);
9836   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9837     {
9838     if (local_quit_available)
9839       {
9840       common->local_quit_available = save_local_quit_available;
9841       common->quit_label = save_quit_label;
9842       common->quit = save_quit;
9843       }
9844     common->in_positive_assertion = save_in_positive_assertion;
9845     common->then_trap = save_then_trap;
9846     common->accept_label = save_accept_label;
9847     common->positive_assertion_quit = save_positive_assertion_quit;
9848     common->accept = save_accept;
9849     return NULL;
9850     }
9851   set_jumps(altbacktrack.topbacktracks, LABEL());
9852 
9853   if (*cc != OP_ALT)
9854     break;
9855 
9856   ccbegin = cc;
9857   cc += GET(cc, 1);
9858   }
9859 
9860 if (local_quit_available)
9861   {
9862   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9863   /* Makes the check less complicated below. */
9864   common->positive_assertion_quit = common->quit;
9865   }
9866 
9867 /* None of them matched. */
9868 if (common->positive_assertion_quit != NULL)
9869   {
9870   jump = JUMP(SLJIT_JUMP);
9871   set_jumps(common->positive_assertion_quit, LABEL());
9872   SLJIT_ASSERT(framesize != no_stack);
9873   if (framesize < 0)
9874     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9875   else
9876     {
9877     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9878     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9879     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9880     }
9881   JUMPHERE(jump);
9882   }
9883 
9884 if (needs_control_head)
9885   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9886 
9887 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9888   {
9889   /* Assert is failed. */
9890   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9891     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9892 
9893   if (framesize < 0)
9894     {
9895     /* The topmost item should be 0. */
9896     if (bra == OP_BRAZERO)
9897       {
9898       if (extrasize == 2)
9899         free_stack(common, 1);
9900       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9901       }
9902     else if (extrasize > 0)
9903       free_stack(common, extrasize);
9904     }
9905   else
9906     {
9907     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9908     /* The topmost item should be 0. */
9909     if (bra == OP_BRAZERO)
9910       {
9911       free_stack(common, framesize + extrasize - 1);
9912       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9913       }
9914     else
9915       free_stack(common, framesize + extrasize);
9916     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9917     }
9918   jump = JUMP(SLJIT_JUMP);
9919   if (bra != OP_BRAZERO)
9920     add_jump(compiler, target, jump);
9921 
9922   /* Assert is successful. */
9923   set_jumps(tmp, LABEL());
9924   if (framesize < 0)
9925     {
9926     /* We know that STR_PTR was stored on the top of the stack. */
9927     if (extrasize > 0)
9928       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9929 
9930     /* Keep the STR_PTR on the top of the stack. */
9931     if (bra == OP_BRAZERO)
9932       {
9933       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9934       if (extrasize == 2)
9935         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9936       }
9937     else if (bra == OP_BRAMINZERO)
9938       {
9939       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9940       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9941       }
9942     }
9943   else
9944     {
9945     if (bra == OP_BRA)
9946       {
9947       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9948       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9949       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9950       }
9951     else
9952       {
9953       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9954       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9955       if (extrasize == 2)
9956         {
9957         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9958         if (bra == OP_BRAMINZERO)
9959           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9960         }
9961       else
9962         {
9963         SLJIT_ASSERT(extrasize == 3);
9964         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9965         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9966         }
9967       }
9968     }
9969 
9970   if (bra == OP_BRAZERO)
9971     {
9972     backtrack->matchingpath = LABEL();
9973     SET_LABEL(jump, backtrack->matchingpath);
9974     }
9975   else if (bra == OP_BRAMINZERO)
9976     {
9977     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9978     JUMPHERE(brajump);
9979     if (framesize >= 0)
9980       {
9981       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9982       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9983       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9984       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9985       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986       }
9987     set_jumps(backtrack->common.topbacktracks, LABEL());
9988     }
9989   }
9990 else
9991   {
9992   /* AssertNot is successful. */
9993   if (framesize < 0)
9994     {
9995     if (extrasize > 0)
9996       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9997 
9998     if (bra != OP_BRA)
9999       {
10000       if (extrasize == 2)
10001         free_stack(common, 1);
10002       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10003       }
10004     else if (extrasize > 0)
10005       free_stack(common, extrasize);
10006     }
10007   else
10008     {
10009     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10010     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10011     /* The topmost item should be 0. */
10012     if (bra != OP_BRA)
10013       {
10014       free_stack(common, framesize + extrasize - 1);
10015       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10016       }
10017     else
10018       free_stack(common, framesize + extrasize);
10019     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10020     }
10021 
10022   if (bra == OP_BRAZERO)
10023     backtrack->matchingpath = LABEL();
10024   else if (bra == OP_BRAMINZERO)
10025     {
10026     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10027     JUMPHERE(brajump);
10028     }
10029 
10030   if (bra != OP_BRA)
10031     {
10032     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10033     set_jumps(backtrack->common.topbacktracks, LABEL());
10034     backtrack->common.topbacktracks = NULL;
10035     }
10036   }
10037 
10038 if (local_quit_available)
10039   {
10040   common->local_quit_available = save_local_quit_available;
10041   common->quit_label = save_quit_label;
10042   common->quit = save_quit;
10043   }
10044 common->in_positive_assertion = save_in_positive_assertion;
10045 common->then_trap = save_then_trap;
10046 common->accept_label = save_accept_label;
10047 common->positive_assertion_quit = save_positive_assertion_quit;
10048 common->accept = save_accept;
10049 return cc + 1 + LINK_SIZE;
10050 }
10051 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10052 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10053 {
10054 DEFINE_COMPILER;
10055 int stacksize;
10056 
10057 if (framesize < 0)
10058   {
10059   if (framesize == no_frame)
10060     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10061   else
10062     {
10063     stacksize = needs_control_head ? 1 : 0;
10064     if (ket != OP_KET || has_alternatives)
10065       stacksize++;
10066 
10067     if (stacksize > 0)
10068       free_stack(common, stacksize);
10069     }
10070 
10071   if (needs_control_head)
10072     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10073 
10074   /* TMP2 which is set here used by OP_KETRMAX below. */
10075   if (ket == OP_KETRMAX)
10076     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10077   else if (ket == OP_KETRMIN)
10078     {
10079     /* Move the STR_PTR to the private_data_ptr. */
10080     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10081     }
10082   }
10083 else
10084   {
10085   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10086   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10087   if (needs_control_head)
10088     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10089 
10090   if (ket == OP_KETRMAX)
10091     {
10092     /* TMP2 which is set here used by OP_KETRMAX below. */
10093     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10094     }
10095   }
10096 if (needs_control_head)
10097   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10098 }
10099 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10100 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10101 {
10102 DEFINE_COMPILER;
10103 
10104 if (common->capture_last_ptr != 0)
10105   {
10106   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10107   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10108   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10109   stacksize++;
10110   }
10111 if (common->optimized_cbracket[offset >> 1] == 0)
10112   {
10113   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10114   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10115   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10116   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10117   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10118   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10119   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10120   stacksize += 2;
10121   }
10122 return stacksize;
10123 }
10124 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10125 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10126 {
10127   if (PRIV(script_run)(ptr, endptr, FALSE))
10128     return endptr;
10129   return NULL;
10130 }
10131 
10132 #ifdef SUPPORT_UNICODE
10133 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10134 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10135 {
10136   if (PRIV(script_run)(ptr, endptr, TRUE))
10137     return endptr;
10138   return NULL;
10139 }
10140 
10141 #endif /* SUPPORT_UNICODE */
10142 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10143 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10144 {
10145 DEFINE_COMPILER;
10146 
10147 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10148 
10149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10150 #ifdef SUPPORT_UNICODE
10151 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10152   common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10153 #else
10154 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10155 #endif
10156 
10157 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10158 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10159 }
10160 
10161 /*
10162   Handling bracketed expressions is probably the most complex part.
10163 
10164   Stack layout naming characters:
10165     S - Push the current STR_PTR
10166     0 - Push a 0 (NULL)
10167     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10168         before the next alternative. Not pushed if there are no alternatives.
10169     M - Any values pushed by the current alternative. Can be empty, or anything.
10170     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10171     L - Push the previous local (pointed by localptr) to the stack
10172    () - opional values stored on the stack
10173   ()* - optonal, can be stored multiple times
10174 
10175   The following list shows the regular expression templates, their PCRE byte codes
10176   and stack layout supported by pcre-sljit.
10177 
10178   (?:)                     OP_BRA     | OP_KET                A M
10179   ()                       OP_CBRA    | OP_KET                C M
10180   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10181                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10182   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10183                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10184   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10185                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10186   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10187                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10188   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10189   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10190   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10191   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10192   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10193            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10194   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10195            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10196   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10197            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10198   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10199            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10200 
10201 
10202   Stack layout naming characters:
10203     A - Push the alternative index (starting from 0) on the stack.
10204         Not pushed if there is no alternatives.
10205     M - Any values pushed by the current alternative. Can be empty, or anything.
10206 
10207   The next list shows the possible content of a bracket:
10208   (|)     OP_*BRA    | OP_ALT ...         M A
10209   (?()|)  OP_*COND   | OP_ALT             M A
10210   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10211                                           Or nothing, if trace is unnecessary
10212 */
10213 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10214 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10215 {
10216 DEFINE_COMPILER;
10217 backtrack_common *backtrack;
10218 PCRE2_UCHAR opcode;
10219 int private_data_ptr = 0;
10220 int offset = 0;
10221 int i, stacksize;
10222 int repeat_ptr = 0, repeat_length = 0;
10223 int repeat_type = 0, repeat_count = 0;
10224 PCRE2_SPTR ccbegin;
10225 PCRE2_SPTR matchingpath;
10226 PCRE2_SPTR slot;
10227 PCRE2_UCHAR bra = OP_BRA;
10228 PCRE2_UCHAR ket;
10229 assert_backtrack *assert;
10230 BOOL has_alternatives;
10231 BOOL needs_control_head = FALSE;
10232 struct sljit_jump *jump;
10233 struct sljit_jump *skip;
10234 struct sljit_label *rmax_label = NULL;
10235 struct sljit_jump *braminzero = NULL;
10236 
10237 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10238 
10239 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10240   {
10241   bra = *cc;
10242   cc++;
10243   opcode = *cc;
10244   }
10245 
10246 opcode = *cc;
10247 ccbegin = cc;
10248 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10249 ket = *matchingpath;
10250 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10251   {
10252   repeat_ptr = PRIVATE_DATA(matchingpath);
10253   repeat_length = PRIVATE_DATA(matchingpath + 1);
10254   repeat_type = PRIVATE_DATA(matchingpath + 2);
10255   repeat_count = PRIVATE_DATA(matchingpath + 3);
10256   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10257   if (repeat_type == OP_UPTO)
10258     ket = OP_KETRMAX;
10259   if (repeat_type == OP_MINUPTO)
10260     ket = OP_KETRMIN;
10261   }
10262 
10263 matchingpath = ccbegin + 1 + LINK_SIZE;
10264 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10265 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10266 cc += GET(cc, 1);
10267 
10268 has_alternatives = *cc == OP_ALT;
10269 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10270   {
10271   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10272     compile_time_checks_must_be_grouped_together);
10273   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10274   }
10275 
10276 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10277   opcode = OP_SCOND;
10278 
10279 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10280   {
10281   /* Capturing brackets has a pre-allocated space. */
10282   offset = GET2(ccbegin, 1 + LINK_SIZE);
10283   if (common->optimized_cbracket[offset] == 0)
10284     {
10285     private_data_ptr = OVECTOR_PRIV(offset);
10286     offset <<= 1;
10287     }
10288   else
10289     {
10290     offset <<= 1;
10291     private_data_ptr = OVECTOR(offset);
10292     }
10293   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10294   matchingpath += IMM2_SIZE;
10295   }
10296 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10297   {
10298   /* Other brackets simply allocate the next entry. */
10299   private_data_ptr = PRIVATE_DATA(ccbegin);
10300   SLJIT_ASSERT(private_data_ptr != 0);
10301   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10302   if (opcode == OP_ONCE)
10303     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10304   }
10305 
10306 /* Instructions before the first alternative. */
10307 stacksize = 0;
10308 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10309   stacksize++;
10310 if (bra == OP_BRAZERO)
10311   stacksize++;
10312 
10313 if (stacksize > 0)
10314   allocate_stack(common, stacksize);
10315 
10316 stacksize = 0;
10317 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10318   {
10319   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10320   stacksize++;
10321   }
10322 
10323 if (bra == OP_BRAZERO)
10324   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10325 
10326 if (bra == OP_BRAMINZERO)
10327   {
10328   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10329   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10330   if (ket != OP_KETRMIN)
10331     {
10332     free_stack(common, 1);
10333     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10334     }
10335   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10336     {
10337     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10338     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10339     /* Nothing stored during the first run. */
10340     skip = JUMP(SLJIT_JUMP);
10341     JUMPHERE(jump);
10342     /* Checking zero-length iteration. */
10343     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10344       {
10345       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10346       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10347       }
10348     else
10349       {
10350       /* Except when the whole stack frame must be saved. */
10351       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10352       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10353       }
10354     JUMPHERE(skip);
10355     }
10356   else
10357     {
10358     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10359     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10360     JUMPHERE(jump);
10361     }
10362   }
10363 
10364 if (repeat_type != 0)
10365   {
10366   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10367   if (repeat_type == OP_EXACT)
10368     rmax_label = LABEL();
10369   }
10370 
10371 if (ket == OP_KETRMIN)
10372   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10373 
10374 if (ket == OP_KETRMAX)
10375   {
10376   rmax_label = LABEL();
10377   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10378     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10379   }
10380 
10381 /* Handling capturing brackets and alternatives. */
10382 if (opcode == OP_ONCE)
10383   {
10384   stacksize = 0;
10385   if (needs_control_head)
10386     {
10387     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10388     stacksize++;
10389     }
10390 
10391   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10392     {
10393     /* Neither capturing brackets nor recursions are found in the block. */
10394     if (ket == OP_KETRMIN)
10395       {
10396       stacksize += 2;
10397       if (!needs_control_head)
10398         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399       }
10400     else
10401       {
10402       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10403         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10404       if (ket == OP_KETRMAX || has_alternatives)
10405         stacksize++;
10406       }
10407 
10408     if (stacksize > 0)
10409       allocate_stack(common, stacksize);
10410 
10411     stacksize = 0;
10412     if (needs_control_head)
10413       {
10414       stacksize++;
10415       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10416       }
10417 
10418     if (ket == OP_KETRMIN)
10419       {
10420       if (needs_control_head)
10421         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10422       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10423       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10424         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10425       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10426       }
10427     else if (ket == OP_KETRMAX || has_alternatives)
10428       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10429     }
10430   else
10431     {
10432     if (ket != OP_KET || has_alternatives)
10433       stacksize++;
10434 
10435     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10436     allocate_stack(common, stacksize);
10437 
10438     if (needs_control_head)
10439       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10440 
10441     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10442     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10443 
10444     stacksize = needs_control_head ? 1 : 0;
10445     if (ket != OP_KET || has_alternatives)
10446       {
10447       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10448       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10449       stacksize++;
10450       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10451       }
10452     else
10453       {
10454       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10455       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10456       }
10457     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10458     }
10459   }
10460 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10461   {
10462   /* Saving the previous values. */
10463   if (common->optimized_cbracket[offset >> 1] != 0)
10464     {
10465     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10466     allocate_stack(common, 2);
10467     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10469     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10470     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10471     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10472     }
10473   else
10474     {
10475     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10476     allocate_stack(common, 1);
10477     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10478     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10479     }
10480   }
10481 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10482   {
10483   /* Saving the previous value. */
10484   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10485   allocate_stack(common, 1);
10486   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10487   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10488   }
10489 else if (has_alternatives)
10490   {
10491   /* Pushing the starting string pointer. */
10492   allocate_stack(common, 1);
10493   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10494   }
10495 
10496 /* Generating code for the first alternative. */
10497 if (opcode == OP_COND || opcode == OP_SCOND)
10498   {
10499   if (*matchingpath == OP_CREF)
10500     {
10501     SLJIT_ASSERT(has_alternatives);
10502     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10503       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10504     matchingpath += 1 + IMM2_SIZE;
10505     }
10506   else if (*matchingpath == OP_DNCREF)
10507     {
10508     SLJIT_ASSERT(has_alternatives);
10509 
10510     i = GET2(matchingpath, 1 + IMM2_SIZE);
10511     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10512     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10513     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10514     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10515     slot += common->name_entry_size;
10516     i--;
10517     while (i-- > 0)
10518       {
10519       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10520       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10521       slot += common->name_entry_size;
10522       }
10523     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10524     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10525     matchingpath += 1 + 2 * IMM2_SIZE;
10526     }
10527   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10528     {
10529     /* Never has other case. */
10530     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10531     SLJIT_ASSERT(!has_alternatives);
10532 
10533     if (*matchingpath == OP_TRUE)
10534       {
10535       stacksize = 1;
10536       matchingpath++;
10537       }
10538     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10539       stacksize = 0;
10540     else if (*matchingpath == OP_RREF)
10541       {
10542       stacksize = GET2(matchingpath, 1);
10543       if (common->currententry == NULL)
10544         stacksize = 0;
10545       else if (stacksize == RREF_ANY)
10546         stacksize = 1;
10547       else if (common->currententry->start == 0)
10548         stacksize = stacksize == 0;
10549       else
10550         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10551 
10552       if (stacksize != 0)
10553         matchingpath += 1 + IMM2_SIZE;
10554       }
10555     else
10556       {
10557       if (common->currententry == NULL || common->currententry->start == 0)
10558         stacksize = 0;
10559       else
10560         {
10561         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10562         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10563         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10564         while (stacksize > 0)
10565           {
10566           if ((int)GET2(slot, 0) == i)
10567             break;
10568           slot += common->name_entry_size;
10569           stacksize--;
10570           }
10571         }
10572 
10573       if (stacksize != 0)
10574         matchingpath += 1 + 2 * IMM2_SIZE;
10575       }
10576 
10577       /* The stacksize == 0 is a common "else" case. */
10578       if (stacksize == 0)
10579         {
10580         if (*cc == OP_ALT)
10581           {
10582           matchingpath = cc + 1 + LINK_SIZE;
10583           cc += GET(cc, 1);
10584           }
10585         else
10586           matchingpath = cc;
10587         }
10588     }
10589   else
10590     {
10591     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10592     /* Similar code as PUSH_BACKTRACK macro. */
10593     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10594     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10595       return NULL;
10596     memset(assert, 0, sizeof(assert_backtrack));
10597     assert->common.cc = matchingpath;
10598     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10599     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10600     }
10601   }
10602 
10603 compile_matchingpath(common, matchingpath, cc, backtrack);
10604 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10605   return NULL;
10606 
10607 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10608   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10609 
10610 if (opcode == OP_ONCE)
10611   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10612 
10613 if (opcode == OP_SCRIPT_RUN)
10614   match_script_run_common(common, private_data_ptr, backtrack);
10615 
10616 stacksize = 0;
10617 if (repeat_type == OP_MINUPTO)
10618   {
10619   /* We need to preserve the counter. TMP2 will be used below. */
10620   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10621   stacksize++;
10622   }
10623 if (ket != OP_KET || bra != OP_BRA)
10624   stacksize++;
10625 if (offset != 0)
10626   {
10627   if (common->capture_last_ptr != 0)
10628     stacksize++;
10629   if (common->optimized_cbracket[offset >> 1] == 0)
10630     stacksize += 2;
10631   }
10632 if (has_alternatives && opcode != OP_ONCE)
10633   stacksize++;
10634 
10635 if (stacksize > 0)
10636   allocate_stack(common, stacksize);
10637 
10638 stacksize = 0;
10639 if (repeat_type == OP_MINUPTO)
10640   {
10641   /* TMP2 was set above. */
10642   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10643   stacksize++;
10644   }
10645 
10646 if (ket != OP_KET || bra != OP_BRA)
10647   {
10648   if (ket != OP_KET)
10649     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10650   else
10651     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10652   stacksize++;
10653   }
10654 
10655 if (offset != 0)
10656   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10657 
10658 /* Skip and count the other alternatives. */
10659 i = 1;
10660 while (*cc == OP_ALT)
10661   {
10662   cc += GET(cc, 1);
10663   i++;
10664   }
10665 
10666 if (has_alternatives)
10667   {
10668   if (opcode != OP_ONCE)
10669     {
10670     if (i <= 3)
10671       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10672     else
10673       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10674     }
10675   if (ket != OP_KETRMAX)
10676     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10677   }
10678 
10679 /* Must be after the matchingpath label. */
10680 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10681   {
10682   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10683   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10684   }
10685 
10686 if (ket == OP_KETRMAX)
10687   {
10688   if (repeat_type != 0)
10689     {
10690     if (has_alternatives)
10691       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10692     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10693     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10694     /* Drop STR_PTR for greedy plus quantifier. */
10695     if (opcode != OP_ONCE)
10696       free_stack(common, 1);
10697     }
10698   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10699     {
10700     if (has_alternatives)
10701       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10702 
10703     /* Checking zero-length iteration. */
10704     if (opcode != OP_ONCE)
10705       {
10706       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10707       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10708       /* Drop STR_PTR for greedy plus quantifier. */
10709       if (bra != OP_BRAZERO)
10710         free_stack(common, 1);
10711       }
10712     else
10713       /* TMP2 must contain the starting STR_PTR. */
10714       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10715     }
10716   else
10717     JUMPTO(SLJIT_JUMP, rmax_label);
10718   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10719   }
10720 
10721 if (repeat_type == OP_EXACT)
10722   {
10723   count_match(common);
10724   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10725   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10726   }
10727 else if (repeat_type == OP_UPTO)
10728   {
10729   /* We need to preserve the counter. */
10730   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10731   allocate_stack(common, 1);
10732   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10733   }
10734 
10735 if (bra == OP_BRAZERO)
10736   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10737 
10738 if (bra == OP_BRAMINZERO)
10739   {
10740   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10741   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10742   if (braminzero != NULL)
10743     {
10744     JUMPHERE(braminzero);
10745     /* We need to release the end pointer to perform the
10746     backtrack for the zero-length iteration. When
10747     framesize is < 0, OP_ONCE will do the release itself. */
10748     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10749       {
10750       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10751       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10752       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10753       }
10754     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10755       free_stack(common, 1);
10756     }
10757   /* Continue to the normal backtrack. */
10758   }
10759 
10760 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10761   count_match(common);
10762 
10763 cc += 1 + LINK_SIZE;
10764 
10765 if (opcode == OP_ONCE)
10766   {
10767   /* We temporarily encode the needs_control_head in the lowest bit.
10768      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10769      the same value for small signed numbers (including negative numbers). */
10770   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10771   }
10772 return cc + repeat_length;
10773 }
10774 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10775 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10776 {
10777 DEFINE_COMPILER;
10778 backtrack_common *backtrack;
10779 PCRE2_UCHAR opcode;
10780 int private_data_ptr;
10781 int cbraprivptr = 0;
10782 BOOL needs_control_head;
10783 int framesize;
10784 int stacksize;
10785 int offset = 0;
10786 BOOL zero = FALSE;
10787 PCRE2_SPTR ccbegin = NULL;
10788 int stack; /* Also contains the offset of control head. */
10789 struct sljit_label *loop = NULL;
10790 struct jump_list *emptymatch = NULL;
10791 
10792 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10793 if (*cc == OP_BRAPOSZERO)
10794   {
10795   zero = TRUE;
10796   cc++;
10797   }
10798 
10799 opcode = *cc;
10800 private_data_ptr = PRIVATE_DATA(cc);
10801 SLJIT_ASSERT(private_data_ptr != 0);
10802 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10803 switch(opcode)
10804   {
10805   case OP_BRAPOS:
10806   case OP_SBRAPOS:
10807   ccbegin = cc + 1 + LINK_SIZE;
10808   break;
10809 
10810   case OP_CBRAPOS:
10811   case OP_SCBRAPOS:
10812   offset = GET2(cc, 1 + LINK_SIZE);
10813   /* This case cannot be optimized in the same was as
10814   normal capturing brackets. */
10815   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10816   cbraprivptr = OVECTOR_PRIV(offset);
10817   offset <<= 1;
10818   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10819   break;
10820 
10821   default:
10822   SLJIT_UNREACHABLE();
10823   break;
10824   }
10825 
10826 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10827 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10828 if (framesize < 0)
10829   {
10830   if (offset != 0)
10831     {
10832     stacksize = 2;
10833     if (common->capture_last_ptr != 0)
10834       stacksize++;
10835     }
10836   else
10837     stacksize = 1;
10838 
10839   if (needs_control_head)
10840     stacksize++;
10841   if (!zero)
10842     stacksize++;
10843 
10844   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10845   allocate_stack(common, stacksize);
10846   if (framesize == no_frame)
10847     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10848 
10849   stack = 0;
10850   if (offset != 0)
10851     {
10852     stack = 2;
10853     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10854     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10855     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10856     if (common->capture_last_ptr != 0)
10857       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10858     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10859     if (needs_control_head)
10860       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10861     if (common->capture_last_ptr != 0)
10862       {
10863       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10864       stack = 3;
10865       }
10866     }
10867   else
10868     {
10869     if (needs_control_head)
10870       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10871     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10872     stack = 1;
10873     }
10874 
10875   if (needs_control_head)
10876     stack++;
10877   if (!zero)
10878     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10879   if (needs_control_head)
10880     {
10881     stack--;
10882     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10883     }
10884   }
10885 else
10886   {
10887   stacksize = framesize + 1;
10888   if (!zero)
10889     stacksize++;
10890   if (needs_control_head)
10891     stacksize++;
10892   if (offset == 0)
10893     stacksize++;
10894   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10895 
10896   allocate_stack(common, stacksize);
10897   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10898   if (needs_control_head)
10899     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10900   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10901 
10902   stack = 0;
10903   if (!zero)
10904     {
10905     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10906     stack = 1;
10907     }
10908   if (needs_control_head)
10909     {
10910     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10911     stack++;
10912     }
10913   if (offset == 0)
10914     {
10915     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10916     stack++;
10917     }
10918   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10919   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10920   stack -= 1 + (offset == 0);
10921   }
10922 
10923 if (offset != 0)
10924   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10925 
10926 loop = LABEL();
10927 while (*cc != OP_KETRPOS)
10928   {
10929   backtrack->top = NULL;
10930   backtrack->topbacktracks = NULL;
10931   cc += GET(cc, 1);
10932 
10933   compile_matchingpath(common, ccbegin, cc, backtrack);
10934   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10935     return NULL;
10936 
10937   if (framesize < 0)
10938     {
10939     if (framesize == no_frame)
10940       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10941 
10942     if (offset != 0)
10943       {
10944       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10945       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10946       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10947       if (common->capture_last_ptr != 0)
10948         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10949       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10950       }
10951     else
10952       {
10953       if (opcode == OP_SBRAPOS)
10954         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10955       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10956       }
10957 
10958     /* Even if the match is empty, we need to reset the control head. */
10959     if (needs_control_head)
10960       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10961 
10962     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10963       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10964 
10965     if (!zero)
10966       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10967     }
10968   else
10969     {
10970     if (offset != 0)
10971       {
10972       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10973       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10974       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10975       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10976       if (common->capture_last_ptr != 0)
10977         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10978       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10979       }
10980     else
10981       {
10982       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10983       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10984       if (opcode == OP_SBRAPOS)
10985         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10986       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10987       }
10988 
10989     /* Even if the match is empty, we need to reset the control head. */
10990     if (needs_control_head)
10991       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10992 
10993     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10994       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10995 
10996     if (!zero)
10997       {
10998       if (framesize < 0)
10999         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11000       else
11001         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11002       }
11003     }
11004 
11005   JUMPTO(SLJIT_JUMP, loop);
11006   flush_stubs(common);
11007 
11008   compile_backtrackingpath(common, backtrack->top);
11009   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11010     return NULL;
11011   set_jumps(backtrack->topbacktracks, LABEL());
11012 
11013   if (framesize < 0)
11014     {
11015     if (offset != 0)
11016       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11017     else
11018       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11019     }
11020   else
11021     {
11022     if (offset != 0)
11023       {
11024       /* Last alternative. */
11025       if (*cc == OP_KETRPOS)
11026         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11027       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11028       }
11029     else
11030       {
11031       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11032       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11033       }
11034     }
11035 
11036   if (*cc == OP_KETRPOS)
11037     break;
11038   ccbegin = cc + 1 + LINK_SIZE;
11039   }
11040 
11041 /* We don't have to restore the control head in case of a failed match. */
11042 
11043 backtrack->topbacktracks = NULL;
11044 if (!zero)
11045   {
11046   if (framesize < 0)
11047     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11048   else /* TMP2 is set to [private_data_ptr] above. */
11049     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11050   }
11051 
11052 /* None of them matched. */
11053 set_jumps(emptymatch, LABEL());
11054 count_match(common);
11055 return cc + 1 + LINK_SIZE;
11056 }
11057 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11058 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11059 {
11060 int class_len;
11061 
11062 *opcode = *cc;
11063 *exact = 0;
11064 
11065 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11066   {
11067   cc++;
11068   *type = OP_CHAR;
11069   }
11070 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11071   {
11072   cc++;
11073   *type = OP_CHARI;
11074   *opcode -= OP_STARI - OP_STAR;
11075   }
11076 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11077   {
11078   cc++;
11079   *type = OP_NOT;
11080   *opcode -= OP_NOTSTAR - OP_STAR;
11081   }
11082 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11083   {
11084   cc++;
11085   *type = OP_NOTI;
11086   *opcode -= OP_NOTSTARI - OP_STAR;
11087   }
11088 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11089   {
11090   cc++;
11091   *opcode -= OP_TYPESTAR - OP_STAR;
11092   *type = OP_END;
11093   }
11094 else
11095   {
11096   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11097   *type = *opcode;
11098   cc++;
11099   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11100   *opcode = cc[class_len - 1];
11101 
11102   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11103     {
11104     *opcode -= OP_CRSTAR - OP_STAR;
11105     *end = cc + class_len;
11106 
11107     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11108       {
11109       *exact = 1;
11110       *opcode -= OP_PLUS - OP_STAR;
11111       }
11112     }
11113   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11114     {
11115     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11116     *end = cc + class_len;
11117 
11118     if (*opcode == OP_POSPLUS)
11119       {
11120       *exact = 1;
11121       *opcode = OP_POSSTAR;
11122       }
11123     }
11124   else
11125     {
11126     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11127     *max = GET2(cc, (class_len + IMM2_SIZE));
11128     *exact = GET2(cc, class_len);
11129 
11130     if (*max == 0)
11131       {
11132       if (*opcode == OP_CRPOSRANGE)
11133         *opcode = OP_POSSTAR;
11134       else
11135         *opcode -= OP_CRRANGE - OP_STAR;
11136       }
11137     else
11138       {
11139       *max -= *exact;
11140       if (*max == 0)
11141         *opcode = OP_EXACT;
11142       else if (*max == 1)
11143         {
11144         if (*opcode == OP_CRPOSRANGE)
11145           *opcode = OP_POSQUERY;
11146         else
11147           *opcode -= OP_CRRANGE - OP_QUERY;
11148         }
11149       else
11150         {
11151         if (*opcode == OP_CRPOSRANGE)
11152           *opcode = OP_POSUPTO;
11153         else
11154           *opcode -= OP_CRRANGE - OP_UPTO;
11155         }
11156       }
11157     *end = cc + class_len + 2 * IMM2_SIZE;
11158     }
11159   return cc;
11160   }
11161 
11162 switch(*opcode)
11163   {
11164   case OP_EXACT:
11165   *exact = GET2(cc, 0);
11166   cc += IMM2_SIZE;
11167   break;
11168 
11169   case OP_PLUS:
11170   case OP_MINPLUS:
11171   *exact = 1;
11172   *opcode -= OP_PLUS - OP_STAR;
11173   break;
11174 
11175   case OP_POSPLUS:
11176   *exact = 1;
11177   *opcode = OP_POSSTAR;
11178   break;
11179 
11180   case OP_UPTO:
11181   case OP_MINUPTO:
11182   case OP_POSUPTO:
11183   *max = GET2(cc, 0);
11184   cc += IMM2_SIZE;
11185   break;
11186   }
11187 
11188 if (*type == OP_END)
11189   {
11190   *type = *cc;
11191   *end = next_opcode(common, cc);
11192   cc++;
11193   return cc;
11194   }
11195 
11196 *end = cc + 1;
11197 #ifdef SUPPORT_UNICODE
11198 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11199 #endif
11200 return cc;
11201 }
11202 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11203 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11204 {
11205 DEFINE_COMPILER;
11206 backtrack_common *backtrack;
11207 PCRE2_UCHAR opcode;
11208 PCRE2_UCHAR type;
11209 sljit_u32 max = 0, exact;
11210 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11211 sljit_s32 early_fail_type;
11212 BOOL charpos_enabled;
11213 PCRE2_UCHAR charpos_char;
11214 unsigned int charpos_othercasebit;
11215 PCRE2_SPTR end;
11216 jump_list *no_match = NULL;
11217 jump_list *no_char1_match = NULL;
11218 struct sljit_jump *jump = NULL;
11219 struct sljit_label *label;
11220 int private_data_ptr = PRIVATE_DATA(cc);
11221 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11222 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11223 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11224 int tmp_base, tmp_offset;
11225 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11226 BOOL use_tmp;
11227 #endif
11228 
11229 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11230 
11231 early_fail_type = (early_fail_ptr & 0x7);
11232 early_fail_ptr >>= 3;
11233 
11234 /* During recursion, these optimizations are disabled. */
11235 if (common->early_fail_start_ptr == 0)
11236   {
11237   early_fail_ptr = 0;
11238   early_fail_type = type_skip;
11239   }
11240 
11241 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11242   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11243 
11244 if (early_fail_type == type_fail)
11245   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11246 
11247 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11248 
11249 if (type != OP_EXTUNI)
11250   {
11251   tmp_base = TMP3;
11252   tmp_offset = 0;
11253   }
11254 else
11255   {
11256   tmp_base = SLJIT_MEM1(SLJIT_SP);
11257   tmp_offset = POSSESSIVE0;
11258   }
11259 
11260 /* Handle fixed part first. */
11261 if (exact > 1)
11262   {
11263   SLJIT_ASSERT(early_fail_ptr == 0);
11264 
11265   if (common->mode == PCRE2_JIT_COMPLETE
11266 #ifdef SUPPORT_UNICODE
11267       && !common->utf
11268 #endif
11269       && type != OP_ANYNL && type != OP_EXTUNI)
11270     {
11271     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11272     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11273     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11274     label = LABEL();
11275     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11276     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11277     JUMPTO(SLJIT_NOT_ZERO, label);
11278     }
11279   else
11280     {
11281     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11282     label = LABEL();
11283     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11284     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11285     JUMPTO(SLJIT_NOT_ZERO, label);
11286     }
11287   }
11288 else if (exact == 1)
11289   {
11290   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11291 
11292   if (early_fail_type == type_fail_range)
11293     {
11294     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11295     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11296     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11297     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11298     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11299 
11300     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11301     }
11302   }
11303 
11304 switch(opcode)
11305   {
11306   case OP_STAR:
11307   case OP_UPTO:
11308   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11309 
11310   if (type == OP_ANYNL || type == OP_EXTUNI)
11311     {
11312     SLJIT_ASSERT(private_data_ptr == 0);
11313     SLJIT_ASSERT(early_fail_ptr == 0);
11314 
11315     allocate_stack(common, 2);
11316     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11317     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11318 
11319     if (opcode == OP_UPTO)
11320       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11321 
11322     label = LABEL();
11323     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11324     if (opcode == OP_UPTO)
11325       {
11326       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11327       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11328       jump = JUMP(SLJIT_ZERO);
11329       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11330       }
11331 
11332     /* We cannot use TMP3 because of allocate_stack. */
11333     allocate_stack(common, 1);
11334     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11335     JUMPTO(SLJIT_JUMP, label);
11336     if (jump != NULL)
11337       JUMPHERE(jump);
11338     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11339     break;
11340     }
11341 #ifdef SUPPORT_UNICODE
11342   else if (type == OP_ALLANY && !common->invalid_utf)
11343 #else
11344   else if (type == OP_ALLANY)
11345 #endif
11346     {
11347     if (opcode == OP_STAR)
11348       {
11349       if (private_data_ptr == 0)
11350         allocate_stack(common, 2);
11351 
11352       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11353       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11354 
11355       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11356       process_partial_match(common);
11357 
11358       if (early_fail_ptr != 0)
11359         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11360       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11361       break;
11362       }
11363 #ifdef SUPPORT_UNICODE
11364     else if (!common->utf)
11365 #else
11366     else
11367 #endif
11368       {
11369       if (private_data_ptr == 0)
11370         allocate_stack(common, 2);
11371 
11372       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11373       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11374 
11375       if (common->mode == PCRE2_JIT_COMPLETE)
11376         {
11377         OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11378         CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11379         }
11380       else
11381         {
11382         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11383         process_partial_match(common);
11384         JUMPHERE(jump);
11385         }
11386 
11387       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11388 
11389       if (early_fail_ptr != 0)
11390         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11391       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11392       break;
11393       }
11394     }
11395 
11396   charpos_enabled = FALSE;
11397   charpos_char = 0;
11398   charpos_othercasebit = 0;
11399 
11400   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11401     {
11402 #ifdef SUPPORT_UNICODE
11403     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11404 #else
11405     charpos_enabled = TRUE;
11406 #endif
11407     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11408       {
11409       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11410       if (charpos_othercasebit == 0)
11411         charpos_enabled = FALSE;
11412       }
11413 
11414     if (charpos_enabled)
11415       {
11416       charpos_char = end[1];
11417       /* Consume the OP_CHAR opcode. */
11418       end += 2;
11419 #if PCRE2_CODE_UNIT_WIDTH == 8
11420       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11421 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11422       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11423       if ((charpos_othercasebit & 0x100) != 0)
11424         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11425 #endif
11426       if (charpos_othercasebit != 0)
11427         charpos_char |= charpos_othercasebit;
11428 
11429       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11430       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11431       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11432       }
11433     }
11434 
11435   if (charpos_enabled)
11436     {
11437     if (opcode == OP_UPTO)
11438       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11439 
11440     /* Search the first instance of charpos_char. */
11441     jump = JUMP(SLJIT_JUMP);
11442     label = LABEL();
11443     if (opcode == OP_UPTO)
11444       {
11445       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11446       add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11447       }
11448     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11449     if (early_fail_ptr != 0)
11450       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11451     JUMPHERE(jump);
11452 
11453     detect_partial_match(common, &backtrack->topbacktracks);
11454     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11455     if (charpos_othercasebit != 0)
11456       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11457     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11458 
11459     if (private_data_ptr == 0)
11460       allocate_stack(common, 2);
11461     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11462     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11463 
11464     if (opcode == OP_UPTO)
11465       {
11466       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11468       }
11469 
11470     /* Search the last instance of charpos_char. */
11471     label = LABEL();
11472     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11473     if (early_fail_ptr != 0)
11474       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11475     detect_partial_match(common, &no_match);
11476     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11477     if (charpos_othercasebit != 0)
11478       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11479 
11480     if (opcode == OP_STAR)
11481       {
11482       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11483       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11484       JUMPTO(SLJIT_JUMP, label);
11485       }
11486     else
11487       {
11488       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11489       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11490       JUMPHERE(jump);
11491       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11492       JUMPTO(SLJIT_NOT_ZERO, label);
11493       }
11494 
11495     set_jumps(no_match, LABEL());
11496     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11497     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11498     }
11499   else
11500     {
11501     if (private_data_ptr == 0)
11502       allocate_stack(common, 2);
11503 
11504     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11505 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11506     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11507     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11508 
11509     if (common->utf)
11510       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11511 #endif
11512     if (opcode == OP_UPTO)
11513       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11514 
11515     detect_partial_match(common, &no_match);
11516     label = LABEL();
11517     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11518 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11519     if (common->utf)
11520       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11521 #endif
11522 
11523     if (opcode == OP_UPTO)
11524       {
11525       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11526       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11527       }
11528 
11529     detect_partial_match_to(common, label);
11530     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11531 
11532     set_jumps(no_char1_match, LABEL());
11533 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11534     if (common->utf)
11535       {
11536       set_jumps(no_match, LABEL());
11537       if (use_tmp)
11538         {
11539         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11540         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11541         }
11542       else
11543         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11544       }
11545     else
11546 #endif
11547       {
11548       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11549       set_jumps(no_match, LABEL());
11550       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11551       }
11552 
11553     if (early_fail_ptr != 0)
11554       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11555     }
11556 
11557   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11558   break;
11559 
11560   case OP_MINSTAR:
11561   if (private_data_ptr == 0)
11562     allocate_stack(common, 1);
11563   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11564   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11565   if (early_fail_ptr != 0)
11566     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11567   break;
11568 
11569   case OP_MINUPTO:
11570   SLJIT_ASSERT(early_fail_ptr == 0);
11571   if (private_data_ptr == 0)
11572     allocate_stack(common, 2);
11573   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11574   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11575   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11576   break;
11577 
11578   case OP_QUERY:
11579   case OP_MINQUERY:
11580   SLJIT_ASSERT(early_fail_ptr == 0);
11581   if (private_data_ptr == 0)
11582     allocate_stack(common, 1);
11583   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11584   if (opcode == OP_QUERY)
11585     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11586   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11587   break;
11588 
11589   case OP_EXACT:
11590   break;
11591 
11592   case OP_POSSTAR:
11593 #if defined SUPPORT_UNICODE
11594   if (type == OP_ALLANY && !common->invalid_utf)
11595 #else
11596   if (type == OP_ALLANY)
11597 #endif
11598     {
11599     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11600     process_partial_match(common);
11601     if (early_fail_ptr != 0)
11602       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11603     break;
11604     }
11605 
11606 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11607   if (common->utf)
11608     {
11609     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11610     detect_partial_match(common, &no_match);
11611     label = LABEL();
11612     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11613     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11614     detect_partial_match_to(common, label);
11615 
11616     set_jumps(no_match, LABEL());
11617     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11618     if (early_fail_ptr != 0)
11619       {
11620       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11621         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11622       else
11623         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624       }
11625     break;
11626     }
11627 #endif
11628 
11629   detect_partial_match(common, &no_match);
11630   label = LABEL();
11631   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11632   detect_partial_match_to(common, label);
11633   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11634 
11635   set_jumps(no_char1_match, LABEL());
11636   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11637   set_jumps(no_match, LABEL());
11638   if (early_fail_ptr != 0)
11639     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11640   break;
11641 
11642   case OP_POSUPTO:
11643   SLJIT_ASSERT(early_fail_ptr == 0);
11644 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11645   if (common->utf)
11646     {
11647     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11648     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11649 
11650     detect_partial_match(common, &no_match);
11651     label = LABEL();
11652     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11653     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11654     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11655     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11656     detect_partial_match_to(common, label);
11657 
11658     set_jumps(no_match, LABEL());
11659     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11660     break;
11661     }
11662 #endif
11663 
11664   if (type == OP_ALLANY)
11665     {
11666     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11667 
11668     if (common->mode == PCRE2_JIT_COMPLETE)
11669       {
11670       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11671       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11672       }
11673     else
11674       {
11675       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11676       process_partial_match(common);
11677       JUMPHERE(jump);
11678       }
11679     break;
11680     }
11681 
11682   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11683 
11684   detect_partial_match(common, &no_match);
11685   label = LABEL();
11686   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11687   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11688   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11689   detect_partial_match_to(common, label);
11690   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11691 
11692   set_jumps(no_char1_match, LABEL());
11693   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11694   set_jumps(no_match, LABEL());
11695   break;
11696 
11697   case OP_POSQUERY:
11698   SLJIT_ASSERT(early_fail_ptr == 0);
11699   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11700   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11701   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11702   set_jumps(no_match, LABEL());
11703   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11704   break;
11705 
11706   default:
11707   SLJIT_UNREACHABLE();
11708   break;
11709   }
11710 
11711 count_match(common);
11712 return end;
11713 }
11714 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11715 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11716 {
11717 DEFINE_COMPILER;
11718 backtrack_common *backtrack;
11719 
11720 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11721 
11722 if (*cc == OP_FAIL)
11723   {
11724   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11725   return cc + 1;
11726   }
11727 
11728 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11729   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11730 
11731 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11732   {
11733   /* No need to check notempty conditions. */
11734   if (common->accept_label == NULL)
11735     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11736   else
11737     JUMPTO(SLJIT_JUMP, common->accept_label);
11738   return cc + 1;
11739   }
11740 
11741 if (common->accept_label == NULL)
11742   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11743 else
11744   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11745 
11746 if (HAS_VIRTUAL_REGISTERS)
11747   {
11748   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11749   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11750   }
11751 else
11752   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11753 
11754 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11755 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11756 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11757 if (common->accept_label == NULL)
11758   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11759 else
11760   JUMPTO(SLJIT_ZERO, common->accept_label);
11761 
11762 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11763 if (common->accept_label == NULL)
11764   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11765 else
11766   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11767 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11768 return cc + 1;
11769 }
11770 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11771 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11772 {
11773 DEFINE_COMPILER;
11774 int offset = GET2(cc, 1);
11775 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11776 
11777 /* Data will be discarded anyway... */
11778 if (common->currententry != NULL)
11779   return cc + 1 + IMM2_SIZE;
11780 
11781 if (!optimized_cbracket)
11782   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11783 offset <<= 1;
11784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11785 if (!optimized_cbracket)
11786   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11787 return cc + 1 + IMM2_SIZE;
11788 }
11789 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11790 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11791 {
11792 DEFINE_COMPILER;
11793 backtrack_common *backtrack;
11794 PCRE2_UCHAR opcode = *cc;
11795 PCRE2_SPTR ccend = cc + 1;
11796 
11797 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11798     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11799   ccend += 2 + cc[1];
11800 
11801 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11802 
11803 if (opcode == OP_SKIP)
11804   {
11805   allocate_stack(common, 1);
11806   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11807   return ccend;
11808   }
11809 
11810 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11811   {
11812   if (HAS_VIRTUAL_REGISTERS)
11813     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11814   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11815   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11816   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11817   }
11818 
11819 return ccend;
11820 }
11821 
11822 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11823 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11824 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11825 {
11826 DEFINE_COMPILER;
11827 backtrack_common *backtrack;
11828 BOOL needs_control_head;
11829 int size;
11830 
11831 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11832 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11833 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11834 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11835 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11836 
11837 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11838 size = 3 + (size < 0 ? 0 : size);
11839 
11840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11841 allocate_stack(common, size);
11842 if (size > 3)
11843   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11844 else
11845   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11849 
11850 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11851 if (size >= 0)
11852   init_frame(common, cc, ccend, size - 1, 0);
11853 }
11854 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11855 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11856 {
11857 DEFINE_COMPILER;
11858 backtrack_common *backtrack;
11859 BOOL has_then_trap = FALSE;
11860 then_trap_backtrack *save_then_trap = NULL;
11861 
11862 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11863 
11864 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11865   {
11866   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11867   has_then_trap = TRUE;
11868   save_then_trap = common->then_trap;
11869   /* Tail item on backtrack. */
11870   compile_then_trap_matchingpath(common, cc, ccend, parent);
11871   }
11872 
11873 while (cc < ccend)
11874   {
11875   switch(*cc)
11876     {
11877     case OP_SOD:
11878     case OP_SOM:
11879     case OP_NOT_WORD_BOUNDARY:
11880     case OP_WORD_BOUNDARY:
11881     case OP_EODN:
11882     case OP_EOD:
11883     case OP_DOLL:
11884     case OP_DOLLM:
11885     case OP_CIRC:
11886     case OP_CIRCM:
11887     case OP_REVERSE:
11888     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11889     break;
11890 
11891     case OP_NOT_DIGIT:
11892     case OP_DIGIT:
11893     case OP_NOT_WHITESPACE:
11894     case OP_WHITESPACE:
11895     case OP_NOT_WORDCHAR:
11896     case OP_WORDCHAR:
11897     case OP_ANY:
11898     case OP_ALLANY:
11899     case OP_ANYBYTE:
11900     case OP_NOTPROP:
11901     case OP_PROP:
11902     case OP_ANYNL:
11903     case OP_NOT_HSPACE:
11904     case OP_HSPACE:
11905     case OP_NOT_VSPACE:
11906     case OP_VSPACE:
11907     case OP_EXTUNI:
11908     case OP_NOT:
11909     case OP_NOTI:
11910     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11911     break;
11912 
11913     case OP_SET_SOM:
11914     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11915     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11916     allocate_stack(common, 1);
11917     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11918     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11919     cc++;
11920     break;
11921 
11922     case OP_CHAR:
11923     case OP_CHARI:
11924     if (common->mode == PCRE2_JIT_COMPLETE)
11925       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11926     else
11927       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11928     break;
11929 
11930     case OP_STAR:
11931     case OP_MINSTAR:
11932     case OP_PLUS:
11933     case OP_MINPLUS:
11934     case OP_QUERY:
11935     case OP_MINQUERY:
11936     case OP_UPTO:
11937     case OP_MINUPTO:
11938     case OP_EXACT:
11939     case OP_POSSTAR:
11940     case OP_POSPLUS:
11941     case OP_POSQUERY:
11942     case OP_POSUPTO:
11943     case OP_STARI:
11944     case OP_MINSTARI:
11945     case OP_PLUSI:
11946     case OP_MINPLUSI:
11947     case OP_QUERYI:
11948     case OP_MINQUERYI:
11949     case OP_UPTOI:
11950     case OP_MINUPTOI:
11951     case OP_EXACTI:
11952     case OP_POSSTARI:
11953     case OP_POSPLUSI:
11954     case OP_POSQUERYI:
11955     case OP_POSUPTOI:
11956     case OP_NOTSTAR:
11957     case OP_NOTMINSTAR:
11958     case OP_NOTPLUS:
11959     case OP_NOTMINPLUS:
11960     case OP_NOTQUERY:
11961     case OP_NOTMINQUERY:
11962     case OP_NOTUPTO:
11963     case OP_NOTMINUPTO:
11964     case OP_NOTEXACT:
11965     case OP_NOTPOSSTAR:
11966     case OP_NOTPOSPLUS:
11967     case OP_NOTPOSQUERY:
11968     case OP_NOTPOSUPTO:
11969     case OP_NOTSTARI:
11970     case OP_NOTMINSTARI:
11971     case OP_NOTPLUSI:
11972     case OP_NOTMINPLUSI:
11973     case OP_NOTQUERYI:
11974     case OP_NOTMINQUERYI:
11975     case OP_NOTUPTOI:
11976     case OP_NOTMINUPTOI:
11977     case OP_NOTEXACTI:
11978     case OP_NOTPOSSTARI:
11979     case OP_NOTPOSPLUSI:
11980     case OP_NOTPOSQUERYI:
11981     case OP_NOTPOSUPTOI:
11982     case OP_TYPESTAR:
11983     case OP_TYPEMINSTAR:
11984     case OP_TYPEPLUS:
11985     case OP_TYPEMINPLUS:
11986     case OP_TYPEQUERY:
11987     case OP_TYPEMINQUERY:
11988     case OP_TYPEUPTO:
11989     case OP_TYPEMINUPTO:
11990     case OP_TYPEEXACT:
11991     case OP_TYPEPOSSTAR:
11992     case OP_TYPEPOSPLUS:
11993     case OP_TYPEPOSQUERY:
11994     case OP_TYPEPOSUPTO:
11995     cc = compile_iterator_matchingpath(common, cc, parent);
11996     break;
11997 
11998     case OP_CLASS:
11999     case OP_NCLASS:
12000     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12001       cc = compile_iterator_matchingpath(common, cc, parent);
12002     else
12003       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12004     break;
12005 
12006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12007     case OP_XCLASS:
12008     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12009       cc = compile_iterator_matchingpath(common, cc, parent);
12010     else
12011       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12012     break;
12013 #endif
12014 
12015     case OP_REF:
12016     case OP_REFI:
12017     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12018       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12019     else
12020       {
12021       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12022       cc += 1 + IMM2_SIZE;
12023       }
12024     break;
12025 
12026     case OP_DNREF:
12027     case OP_DNREFI:
12028     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12029       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12030     else
12031       {
12032       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12033       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12034       cc += 1 + 2 * IMM2_SIZE;
12035       }
12036     break;
12037 
12038     case OP_RECURSE:
12039     cc = compile_recurse_matchingpath(common, cc, parent);
12040     break;
12041 
12042     case OP_CALLOUT:
12043     case OP_CALLOUT_STR:
12044     cc = compile_callout_matchingpath(common, cc, parent);
12045     break;
12046 
12047     case OP_ASSERT:
12048     case OP_ASSERT_NOT:
12049     case OP_ASSERTBACK:
12050     case OP_ASSERTBACK_NOT:
12051     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12052     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12053     break;
12054 
12055     case OP_BRAMINZERO:
12056     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12057     cc = bracketend(cc + 1);
12058     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12059       {
12060       allocate_stack(common, 1);
12061       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12062       }
12063     else
12064       {
12065       allocate_stack(common, 2);
12066       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12068       }
12069     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12070     count_match(common);
12071     break;
12072 
12073     case OP_ASSERT_NA:
12074     case OP_ASSERTBACK_NA:
12075     case OP_ONCE:
12076     case OP_SCRIPT_RUN:
12077     case OP_BRA:
12078     case OP_CBRA:
12079     case OP_COND:
12080     case OP_SBRA:
12081     case OP_SCBRA:
12082     case OP_SCOND:
12083     cc = compile_bracket_matchingpath(common, cc, parent);
12084     break;
12085 
12086     case OP_BRAZERO:
12087     if (cc[1] > OP_ASSERTBACK_NOT)
12088       cc = compile_bracket_matchingpath(common, cc, parent);
12089     else
12090       {
12091       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12092       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12093       }
12094     break;
12095 
12096     case OP_BRAPOS:
12097     case OP_CBRAPOS:
12098     case OP_SBRAPOS:
12099     case OP_SCBRAPOS:
12100     case OP_BRAPOSZERO:
12101     cc = compile_bracketpos_matchingpath(common, cc, parent);
12102     break;
12103 
12104     case OP_MARK:
12105     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12106     SLJIT_ASSERT(common->mark_ptr != 0);
12107     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12108     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12109     if (HAS_VIRTUAL_REGISTERS)
12110       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12111     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12112     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12113     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12114     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12115     if (common->has_skip_arg)
12116       {
12117       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12118       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12119       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12120       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12121       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12122       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12123       }
12124     cc += 1 + 2 + cc[1];
12125     break;
12126 
12127     case OP_PRUNE:
12128     case OP_PRUNE_ARG:
12129     case OP_SKIP:
12130     case OP_SKIP_ARG:
12131     case OP_THEN:
12132     case OP_THEN_ARG:
12133     case OP_COMMIT:
12134     case OP_COMMIT_ARG:
12135     cc = compile_control_verb_matchingpath(common, cc, parent);
12136     break;
12137 
12138     case OP_FAIL:
12139     case OP_ACCEPT:
12140     case OP_ASSERT_ACCEPT:
12141     cc = compile_fail_accept_matchingpath(common, cc, parent);
12142     break;
12143 
12144     case OP_CLOSE:
12145     cc = compile_close_matchingpath(common, cc);
12146     break;
12147 
12148     case OP_SKIPZERO:
12149     cc = bracketend(cc + 1);
12150     break;
12151 
12152     default:
12153     SLJIT_UNREACHABLE();
12154     return;
12155     }
12156   if (cc == NULL)
12157     return;
12158   }
12159 
12160 if (has_then_trap)
12161   {
12162   /* Head item on backtrack. */
12163   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12164   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12165   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12166   common->then_trap = save_then_trap;
12167   }
12168 SLJIT_ASSERT(cc == ccend);
12169 }
12170 
12171 #undef PUSH_BACKTRACK
12172 #undef PUSH_BACKTRACK_NOVALUE
12173 #undef BACKTRACK_AS
12174 
12175 #define COMPILE_BACKTRACKINGPATH(current) \
12176   do \
12177     { \
12178     compile_backtrackingpath(common, (current)); \
12179     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12180       return; \
12181     } \
12182   while (0)
12183 
12184 #define CURRENT_AS(type) ((type *)current)
12185 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12186 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12187 {
12188 DEFINE_COMPILER;
12189 PCRE2_SPTR cc = current->cc;
12190 PCRE2_UCHAR opcode;
12191 PCRE2_UCHAR type;
12192 sljit_u32 max = 0, exact;
12193 struct sljit_label *label = NULL;
12194 struct sljit_jump *jump = NULL;
12195 jump_list *jumplist = NULL;
12196 PCRE2_SPTR end;
12197 int private_data_ptr = PRIVATE_DATA(cc);
12198 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12199 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12200 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12201 
12202 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12203 
12204 switch(opcode)
12205   {
12206   case OP_STAR:
12207   case OP_UPTO:
12208   if (type == OP_ANYNL || type == OP_EXTUNI)
12209     {
12210     SLJIT_ASSERT(private_data_ptr == 0);
12211     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12212     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12213     free_stack(common, 1);
12214     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12215     }
12216   else
12217     {
12218     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12219       {
12220       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12221       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12222       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12223 
12224       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12225       label = LABEL();
12226       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12227       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12228       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12229         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12230       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12231       move_back(common, NULL, TRUE);
12232       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12233       }
12234     else
12235       {
12236       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12237       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12238       move_back(common, NULL, TRUE);
12239       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12240       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12241       }
12242     JUMPHERE(jump);
12243     if (private_data_ptr == 0)
12244       free_stack(common, 2);
12245     }
12246   break;
12247 
12248   case OP_MINSTAR:
12249   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12250   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12251   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12252   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12253   set_jumps(jumplist, LABEL());
12254   if (private_data_ptr == 0)
12255     free_stack(common, 1);
12256   break;
12257 
12258   case OP_MINUPTO:
12259   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12260   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12261   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12262   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12263 
12264   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12265   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12266   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12267   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12268 
12269   set_jumps(jumplist, LABEL());
12270   if (private_data_ptr == 0)
12271     free_stack(common, 2);
12272   break;
12273 
12274   case OP_QUERY:
12275   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12276   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12277   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12278   jump = JUMP(SLJIT_JUMP);
12279   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12280   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12281   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12282   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12283   JUMPHERE(jump);
12284   if (private_data_ptr == 0)
12285     free_stack(common, 1);
12286   break;
12287 
12288   case OP_MINQUERY:
12289   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12291   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12292   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12293   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12294   set_jumps(jumplist, LABEL());
12295   JUMPHERE(jump);
12296   if (private_data_ptr == 0)
12297     free_stack(common, 1);
12298   break;
12299 
12300   case OP_EXACT:
12301   case OP_POSSTAR:
12302   case OP_POSQUERY:
12303   case OP_POSUPTO:
12304   break;
12305 
12306   default:
12307   SLJIT_UNREACHABLE();
12308   break;
12309   }
12310 
12311 set_jumps(current->topbacktracks, LABEL());
12312 }
12313 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12314 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12315 {
12316 DEFINE_COMPILER;
12317 PCRE2_SPTR cc = current->cc;
12318 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12319 PCRE2_UCHAR type;
12320 
12321 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12322 
12323 if ((type & 0x1) == 0)
12324   {
12325   /* Maximize case. */
12326   set_jumps(current->topbacktracks, LABEL());
12327   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12328   free_stack(common, 1);
12329   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12330   return;
12331   }
12332 
12333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12334 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12335 set_jumps(current->topbacktracks, LABEL());
12336 free_stack(common, ref ? 2 : 3);
12337 }
12338 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12339 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12340 {
12341 DEFINE_COMPILER;
12342 recurse_entry *entry;
12343 
12344 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12345   {
12346   entry = CURRENT_AS(recurse_backtrack)->entry;
12347   if (entry->backtrack_label == NULL)
12348     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12349   else
12350     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12351   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12352   }
12353 else
12354   compile_backtrackingpath(common, current->top);
12355 
12356 set_jumps(current->topbacktracks, LABEL());
12357 }
12358 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12359 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12360 {
12361 DEFINE_COMPILER;
12362 PCRE2_SPTR cc = current->cc;
12363 PCRE2_UCHAR bra = OP_BRA;
12364 struct sljit_jump *brajump = NULL;
12365 
12366 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12367 if (*cc == OP_BRAZERO)
12368   {
12369   bra = *cc;
12370   cc++;
12371   }
12372 
12373 if (bra == OP_BRAZERO)
12374   {
12375   SLJIT_ASSERT(current->topbacktracks == NULL);
12376   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12377   }
12378 
12379 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12380   {
12381   set_jumps(current->topbacktracks, LABEL());
12382 
12383   if (bra == OP_BRAZERO)
12384     {
12385     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12386     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12387     free_stack(common, 1);
12388     }
12389   return;
12390   }
12391 
12392 if (bra == OP_BRAZERO)
12393   {
12394   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12395     {
12396     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12397     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12398     free_stack(common, 1);
12399     return;
12400     }
12401   free_stack(common, 1);
12402   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12403   }
12404 
12405 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12406   {
12407   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12408   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12409   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12410   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12411   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12412 
12413   set_jumps(current->topbacktracks, LABEL());
12414   }
12415 else
12416   set_jumps(current->topbacktracks, LABEL());
12417 
12418 if (bra == OP_BRAZERO)
12419   {
12420   /* We know there is enough place on the stack. */
12421   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12422   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12423   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12424   JUMPHERE(brajump);
12425   }
12426 }
12427 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12428 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429 {
12430 DEFINE_COMPILER;
12431 int opcode, stacksize, alt_count, alt_max;
12432 int offset = 0;
12433 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12434 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12435 PCRE2_SPTR cc = current->cc;
12436 PCRE2_SPTR ccbegin;
12437 PCRE2_SPTR ccprev;
12438 PCRE2_UCHAR bra = OP_BRA;
12439 PCRE2_UCHAR ket;
12440 assert_backtrack *assert;
12441 BOOL has_alternatives;
12442 BOOL needs_control_head = FALSE;
12443 struct sljit_jump *brazero = NULL;
12444 struct sljit_jump *next_alt = NULL;
12445 struct sljit_jump *once = NULL;
12446 struct sljit_jump *cond = NULL;
12447 struct sljit_label *rmin_label = NULL;
12448 struct sljit_label *exact_label = NULL;
12449 struct sljit_put_label *put_label = NULL;
12450 
12451 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12452   {
12453   bra = *cc;
12454   cc++;
12455   }
12456 
12457 opcode = *cc;
12458 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12459 ket = *ccbegin;
12460 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12461   {
12462   repeat_ptr = PRIVATE_DATA(ccbegin);
12463   repeat_type = PRIVATE_DATA(ccbegin + 2);
12464   repeat_count = PRIVATE_DATA(ccbegin + 3);
12465   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12466   if (repeat_type == OP_UPTO)
12467     ket = OP_KETRMAX;
12468   if (repeat_type == OP_MINUPTO)
12469     ket = OP_KETRMIN;
12470   }
12471 ccbegin = cc;
12472 cc += GET(cc, 1);
12473 has_alternatives = *cc == OP_ALT;
12474 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12475   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12476 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12477   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12478 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12479   opcode = OP_SCOND;
12480 
12481 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12482 
12483 /* Decoding the needs_control_head in framesize. */
12484 if (opcode == OP_ONCE)
12485   {
12486   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12487   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12488   }
12489 
12490 if (ket != OP_KET && repeat_type != 0)
12491   {
12492   /* TMP1 is used in OP_KETRMIN below. */
12493   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12494   free_stack(common, 1);
12495   if (repeat_type == OP_UPTO)
12496     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12497   else
12498     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12499   }
12500 
12501 if (ket == OP_KETRMAX)
12502   {
12503   if (bra == OP_BRAZERO)
12504     {
12505     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506     free_stack(common, 1);
12507     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12508     }
12509   }
12510 else if (ket == OP_KETRMIN)
12511   {
12512   if (bra != OP_BRAMINZERO)
12513     {
12514     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12515     if (repeat_type != 0)
12516       {
12517       /* TMP1 was set a few lines above. */
12518       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12519       /* Drop STR_PTR for non-greedy plus quantifier. */
12520       if (opcode != OP_ONCE)
12521         free_stack(common, 1);
12522       }
12523     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12524       {
12525       /* Checking zero-length iteration. */
12526       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12527         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12528       else
12529         {
12530         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12531         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12532         }
12533       /* Drop STR_PTR for non-greedy plus quantifier. */
12534       if (opcode != OP_ONCE)
12535         free_stack(common, 1);
12536       }
12537     else
12538       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12539     }
12540   rmin_label = LABEL();
12541   if (repeat_type != 0)
12542     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12543   }
12544 else if (bra == OP_BRAZERO)
12545   {
12546   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12547   free_stack(common, 1);
12548   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12549   }
12550 else if (repeat_type == OP_EXACT)
12551   {
12552   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12553   exact_label = LABEL();
12554   }
12555 
12556 if (offset != 0)
12557   {
12558   if (common->capture_last_ptr != 0)
12559     {
12560     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12561     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12562     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12563     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12564     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12565     free_stack(common, 3);
12566     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12567     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12568     }
12569   else if (common->optimized_cbracket[offset >> 1] == 0)
12570     {
12571     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12572     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12573     free_stack(common, 2);
12574     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12575     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12576     }
12577   }
12578 
12579 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12580   {
12581   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12582     {
12583     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12584     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12585     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12586     }
12587   once = JUMP(SLJIT_JUMP);
12588   }
12589 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12590   {
12591   if (has_alternatives)
12592     {
12593     /* Always exactly one alternative. */
12594     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595     free_stack(common, 1);
12596 
12597     alt_max = 2;
12598     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12599     }
12600   }
12601 else if (has_alternatives)
12602   {
12603   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12604   free_stack(common, 1);
12605 
12606   if (alt_max > 3)
12607     {
12608     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12609 
12610     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12611     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12612     sljit_emit_op0(compiler, SLJIT_ENDBR);
12613     }
12614   else
12615     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12616   }
12617 
12618 COMPILE_BACKTRACKINGPATH(current->top);
12619 if (current->topbacktracks)
12620   set_jumps(current->topbacktracks, LABEL());
12621 
12622 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12623   {
12624   /* Conditional block always has at most one alternative. */
12625   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12626     {
12627     SLJIT_ASSERT(has_alternatives);
12628     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12629     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12630       {
12631       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12632       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12633       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12634       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12635       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12636       }
12637     cond = JUMP(SLJIT_JUMP);
12638     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12639     }
12640   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12641     {
12642     SLJIT_ASSERT(has_alternatives);
12643     cond = JUMP(SLJIT_JUMP);
12644     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12645     }
12646   else
12647     SLJIT_ASSERT(!has_alternatives);
12648   }
12649 
12650 if (has_alternatives)
12651   {
12652   alt_count = 1;
12653   do
12654     {
12655     current->top = NULL;
12656     current->topbacktracks = NULL;
12657     current->nextbacktracks = NULL;
12658     /* Conditional blocks always have an additional alternative, even if it is empty. */
12659     if (*cc == OP_ALT)
12660       {
12661       ccprev = cc + 1 + LINK_SIZE;
12662       cc += GET(cc, 1);
12663       if (opcode != OP_COND && opcode != OP_SCOND)
12664         {
12665         if (opcode != OP_ONCE)
12666           {
12667           if (private_data_ptr != 0)
12668             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12669           else
12670             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12671           }
12672         else
12673           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12674         }
12675       compile_matchingpath(common, ccprev, cc, current);
12676       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12677         return;
12678 
12679       if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12680         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12681 
12682       if (opcode == OP_SCRIPT_RUN)
12683         match_script_run_common(common, private_data_ptr, current);
12684       }
12685 
12686     /* Instructions after the current alternative is successfully matched. */
12687     /* There is a similar code in compile_bracket_matchingpath. */
12688     if (opcode == OP_ONCE)
12689       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12690 
12691     stacksize = 0;
12692     if (repeat_type == OP_MINUPTO)
12693       {
12694       /* We need to preserve the counter. TMP2 will be used below. */
12695       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12696       stacksize++;
12697       }
12698     if (ket != OP_KET || bra != OP_BRA)
12699       stacksize++;
12700     if (offset != 0)
12701       {
12702       if (common->capture_last_ptr != 0)
12703         stacksize++;
12704       if (common->optimized_cbracket[offset >> 1] == 0)
12705         stacksize += 2;
12706       }
12707     if (opcode != OP_ONCE)
12708       stacksize++;
12709 
12710     if (stacksize > 0)
12711       allocate_stack(common, stacksize);
12712 
12713     stacksize = 0;
12714     if (repeat_type == OP_MINUPTO)
12715       {
12716       /* TMP2 was set above. */
12717       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12718       stacksize++;
12719       }
12720 
12721     if (ket != OP_KET || bra != OP_BRA)
12722       {
12723       if (ket != OP_KET)
12724         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12725       else
12726         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12727       stacksize++;
12728       }
12729 
12730     if (offset != 0)
12731       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12732 
12733     if (opcode != OP_ONCE)
12734       {
12735       if (alt_max <= 3)
12736         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12737       else
12738         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12739       }
12740 
12741     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12742       {
12743       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12744       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12745       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12746       }
12747 
12748     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12749 
12750     if (opcode != OP_ONCE)
12751       {
12752       if (alt_max <= 3)
12753         {
12754         JUMPHERE(next_alt);
12755         alt_count++;
12756         if (alt_count < alt_max)
12757           {
12758           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12759           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12760           }
12761         }
12762       else
12763         {
12764         sljit_set_put_label(put_label, LABEL());
12765         sljit_emit_op0(compiler, SLJIT_ENDBR);
12766         }
12767       }
12768 
12769     COMPILE_BACKTRACKINGPATH(current->top);
12770     if (current->topbacktracks)
12771       set_jumps(current->topbacktracks, LABEL());
12772     SLJIT_ASSERT(!current->nextbacktracks);
12773     }
12774   while (*cc == OP_ALT);
12775 
12776   if (cond != NULL)
12777     {
12778     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12779     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12780     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12781       {
12782       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12783       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12784       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12785       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12786       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12787       }
12788     JUMPHERE(cond);
12789     }
12790 
12791   /* Free the STR_PTR. */
12792   if (private_data_ptr == 0)
12793     free_stack(common, 1);
12794   }
12795 
12796 if (offset != 0)
12797   {
12798   /* Using both tmp register is better for instruction scheduling. */
12799   if (common->optimized_cbracket[offset >> 1] != 0)
12800     {
12801     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12802     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12803     free_stack(common, 2);
12804     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12805     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12806     }
12807   else
12808     {
12809     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810     free_stack(common, 1);
12811     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12812     }
12813   }
12814 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12815   {
12816   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12817   free_stack(common, 1);
12818   }
12819 else if (opcode == OP_ONCE)
12820   {
12821   cc = ccbegin + GET(ccbegin, 1);
12822   stacksize = needs_control_head ? 1 : 0;
12823 
12824   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12825     {
12826     /* Reset head and drop saved frame. */
12827     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12828     }
12829   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12830     {
12831     /* The STR_PTR must be released. */
12832     stacksize++;
12833     }
12834 
12835   if (stacksize > 0)
12836     free_stack(common, stacksize);
12837 
12838   JUMPHERE(once);
12839   /* Restore previous private_data_ptr */
12840   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12841     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12842   else if (ket == OP_KETRMIN)
12843     {
12844     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12845     /* See the comment below. */
12846     free_stack(common, 2);
12847     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12848     }
12849   }
12850 
12851 if (repeat_type == OP_EXACT)
12852   {
12853   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12854   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12855   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12856   }
12857 else if (ket == OP_KETRMAX)
12858   {
12859   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12860   if (bra != OP_BRAZERO)
12861     free_stack(common, 1);
12862 
12863   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12864   if (bra == OP_BRAZERO)
12865     {
12866     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12867     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12868     JUMPHERE(brazero);
12869     free_stack(common, 1);
12870     }
12871   }
12872 else if (ket == OP_KETRMIN)
12873   {
12874   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12875 
12876   /* OP_ONCE removes everything in case of a backtrack, so we don't
12877   need to explicitly release the STR_PTR. The extra release would
12878   affect badly the free_stack(2) above. */
12879   if (opcode != OP_ONCE)
12880     free_stack(common, 1);
12881   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12882   if (opcode == OP_ONCE)
12883     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12884   else if (bra == OP_BRAMINZERO)
12885     free_stack(common, 1);
12886   }
12887 else if (bra == OP_BRAZERO)
12888   {
12889   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12890   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12891   JUMPHERE(brazero);
12892   }
12893 }
12894 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12895 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12896 {
12897 DEFINE_COMPILER;
12898 int offset;
12899 struct sljit_jump *jump;
12900 
12901 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12902   {
12903   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12904     {
12905     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12906     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12907     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12908     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12909     if (common->capture_last_ptr != 0)
12910       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12911     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12912     if (common->capture_last_ptr != 0)
12913       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12914     }
12915   set_jumps(current->topbacktracks, LABEL());
12916   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12917   return;
12918   }
12919 
12920 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12921 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12922 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12923 
12924 if (current->topbacktracks)
12925   {
12926   jump = JUMP(SLJIT_JUMP);
12927   set_jumps(current->topbacktracks, LABEL());
12928   /* Drop the stack frame. */
12929   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12930   JUMPHERE(jump);
12931   }
12932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12933 }
12934 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12935 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12936 {
12937 assert_backtrack backtrack;
12938 
12939 current->top = NULL;
12940 current->topbacktracks = NULL;
12941 current->nextbacktracks = NULL;
12942 if (current->cc[1] > OP_ASSERTBACK_NOT)
12943   {
12944   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12945   compile_bracket_matchingpath(common, current->cc, current);
12946   compile_bracket_backtrackingpath(common, current->top);
12947   }
12948 else
12949   {
12950   memset(&backtrack, 0, sizeof(backtrack));
12951   backtrack.common.cc = current->cc;
12952   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12953   /* Manual call of compile_assert_matchingpath. */
12954   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12955   }
12956 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12957 }
12958 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960 {
12961 DEFINE_COMPILER;
12962 PCRE2_UCHAR opcode = *current->cc;
12963 struct sljit_label *loop;
12964 struct sljit_jump *jump;
12965 
12966 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12967   {
12968   if (common->then_trap != NULL)
12969     {
12970     SLJIT_ASSERT(common->control_head_ptr != 0);
12971 
12972     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12973     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12974     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12975     jump = JUMP(SLJIT_JUMP);
12976 
12977     loop = LABEL();
12978     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12979     JUMPHERE(jump);
12980     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12981     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12982     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12983     return;
12984     }
12985   else if (!common->local_quit_available && common->in_positive_assertion)
12986     {
12987     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12988     return;
12989     }
12990   }
12991 
12992 if (common->local_quit_available)
12993   {
12994   /* Abort match with a fail. */
12995   if (common->quit_label == NULL)
12996     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12997   else
12998     JUMPTO(SLJIT_JUMP, common->quit_label);
12999   return;
13000   }
13001 
13002 if (opcode == OP_SKIP_ARG)
13003   {
13004   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13005   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13006   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13007   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13008 
13009   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13010   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13011   return;
13012   }
13013 
13014 if (opcode == OP_SKIP)
13015   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13016 else
13017   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13018 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13019 }
13020 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13021 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13022 {
13023 DEFINE_COMPILER;
13024 struct sljit_jump *jump;
13025 int size;
13026 
13027 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13028   {
13029   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13030   return;
13031   }
13032 
13033 size = CURRENT_AS(then_trap_backtrack)->framesize;
13034 size = 3 + (size < 0 ? 0 : size);
13035 
13036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13037 free_stack(common, size);
13038 jump = JUMP(SLJIT_JUMP);
13039 
13040 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13041 /* STACK_TOP is set by THEN. */
13042 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13043   {
13044   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13045   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13046   }
13047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 free_stack(common, 3);
13049 
13050 JUMPHERE(jump);
13051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13052 }
13053 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13054 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13055 {
13056 DEFINE_COMPILER;
13057 then_trap_backtrack *save_then_trap = common->then_trap;
13058 
13059 while (current)
13060   {
13061   if (current->nextbacktracks != NULL)
13062     set_jumps(current->nextbacktracks, LABEL());
13063   switch(*current->cc)
13064     {
13065     case OP_SET_SOM:
13066     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13067     free_stack(common, 1);
13068     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13069     break;
13070 
13071     case OP_STAR:
13072     case OP_MINSTAR:
13073     case OP_PLUS:
13074     case OP_MINPLUS:
13075     case OP_QUERY:
13076     case OP_MINQUERY:
13077     case OP_UPTO:
13078     case OP_MINUPTO:
13079     case OP_EXACT:
13080     case OP_POSSTAR:
13081     case OP_POSPLUS:
13082     case OP_POSQUERY:
13083     case OP_POSUPTO:
13084     case OP_STARI:
13085     case OP_MINSTARI:
13086     case OP_PLUSI:
13087     case OP_MINPLUSI:
13088     case OP_QUERYI:
13089     case OP_MINQUERYI:
13090     case OP_UPTOI:
13091     case OP_MINUPTOI:
13092     case OP_EXACTI:
13093     case OP_POSSTARI:
13094     case OP_POSPLUSI:
13095     case OP_POSQUERYI:
13096     case OP_POSUPTOI:
13097     case OP_NOTSTAR:
13098     case OP_NOTMINSTAR:
13099     case OP_NOTPLUS:
13100     case OP_NOTMINPLUS:
13101     case OP_NOTQUERY:
13102     case OP_NOTMINQUERY:
13103     case OP_NOTUPTO:
13104     case OP_NOTMINUPTO:
13105     case OP_NOTEXACT:
13106     case OP_NOTPOSSTAR:
13107     case OP_NOTPOSPLUS:
13108     case OP_NOTPOSQUERY:
13109     case OP_NOTPOSUPTO:
13110     case OP_NOTSTARI:
13111     case OP_NOTMINSTARI:
13112     case OP_NOTPLUSI:
13113     case OP_NOTMINPLUSI:
13114     case OP_NOTQUERYI:
13115     case OP_NOTMINQUERYI:
13116     case OP_NOTUPTOI:
13117     case OP_NOTMINUPTOI:
13118     case OP_NOTEXACTI:
13119     case OP_NOTPOSSTARI:
13120     case OP_NOTPOSPLUSI:
13121     case OP_NOTPOSQUERYI:
13122     case OP_NOTPOSUPTOI:
13123     case OP_TYPESTAR:
13124     case OP_TYPEMINSTAR:
13125     case OP_TYPEPLUS:
13126     case OP_TYPEMINPLUS:
13127     case OP_TYPEQUERY:
13128     case OP_TYPEMINQUERY:
13129     case OP_TYPEUPTO:
13130     case OP_TYPEMINUPTO:
13131     case OP_TYPEEXACT:
13132     case OP_TYPEPOSSTAR:
13133     case OP_TYPEPOSPLUS:
13134     case OP_TYPEPOSQUERY:
13135     case OP_TYPEPOSUPTO:
13136     case OP_CLASS:
13137     case OP_NCLASS:
13138 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13139     case OP_XCLASS:
13140 #endif
13141     compile_iterator_backtrackingpath(common, current);
13142     break;
13143 
13144     case OP_REF:
13145     case OP_REFI:
13146     case OP_DNREF:
13147     case OP_DNREFI:
13148     compile_ref_iterator_backtrackingpath(common, current);
13149     break;
13150 
13151     case OP_RECURSE:
13152     compile_recurse_backtrackingpath(common, current);
13153     break;
13154 
13155     case OP_ASSERT:
13156     case OP_ASSERT_NOT:
13157     case OP_ASSERTBACK:
13158     case OP_ASSERTBACK_NOT:
13159     compile_assert_backtrackingpath(common, current);
13160     break;
13161 
13162     case OP_ASSERT_NA:
13163     case OP_ASSERTBACK_NA:
13164     case OP_ONCE:
13165     case OP_SCRIPT_RUN:
13166     case OP_BRA:
13167     case OP_CBRA:
13168     case OP_COND:
13169     case OP_SBRA:
13170     case OP_SCBRA:
13171     case OP_SCOND:
13172     compile_bracket_backtrackingpath(common, current);
13173     break;
13174 
13175     case OP_BRAZERO:
13176     if (current->cc[1] > OP_ASSERTBACK_NOT)
13177       compile_bracket_backtrackingpath(common, current);
13178     else
13179       compile_assert_backtrackingpath(common, current);
13180     break;
13181 
13182     case OP_BRAPOS:
13183     case OP_CBRAPOS:
13184     case OP_SBRAPOS:
13185     case OP_SCBRAPOS:
13186     case OP_BRAPOSZERO:
13187     compile_bracketpos_backtrackingpath(common, current);
13188     break;
13189 
13190     case OP_BRAMINZERO:
13191     compile_braminzero_backtrackingpath(common, current);
13192     break;
13193 
13194     case OP_MARK:
13195     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13196     if (common->has_skip_arg)
13197       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13198     free_stack(common, common->has_skip_arg ? 5 : 1);
13199     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13200     if (common->has_skip_arg)
13201       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13202     break;
13203 
13204     case OP_THEN:
13205     case OP_THEN_ARG:
13206     case OP_PRUNE:
13207     case OP_PRUNE_ARG:
13208     case OP_SKIP:
13209     case OP_SKIP_ARG:
13210     compile_control_verb_backtrackingpath(common, current);
13211     break;
13212 
13213     case OP_COMMIT:
13214     case OP_COMMIT_ARG:
13215     if (!common->local_quit_available)
13216       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13217     if (common->quit_label == NULL)
13218       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13219     else
13220       JUMPTO(SLJIT_JUMP, common->quit_label);
13221     break;
13222 
13223     case OP_CALLOUT:
13224     case OP_CALLOUT_STR:
13225     case OP_FAIL:
13226     case OP_ACCEPT:
13227     case OP_ASSERT_ACCEPT:
13228     set_jumps(current->topbacktracks, LABEL());
13229     break;
13230 
13231     case OP_THEN_TRAP:
13232     /* A virtual opcode for then traps. */
13233     compile_then_trap_backtrackingpath(common, current);
13234     break;
13235 
13236     default:
13237     SLJIT_UNREACHABLE();
13238     break;
13239     }
13240   current = current->prev;
13241   }
13242 common->then_trap = save_then_trap;
13243 }
13244 
compile_recurse(compiler_common * common)13245 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13246 {
13247 DEFINE_COMPILER;
13248 PCRE2_SPTR cc = common->start + common->currententry->start;
13249 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13250 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13251 BOOL needs_control_head;
13252 BOOL has_quit;
13253 BOOL has_accept;
13254 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13255 int alt_count, alt_max, local_size;
13256 backtrack_common altbacktrack;
13257 jump_list *match = NULL;
13258 struct sljit_jump *next_alt = NULL;
13259 struct sljit_jump *accept_exit = NULL;
13260 struct sljit_label *quit;
13261 struct sljit_put_label *put_label = NULL;
13262 
13263 /* Recurse captures then. */
13264 common->then_trap = NULL;
13265 
13266 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13267 
13268 alt_max = no_alternatives(cc);
13269 alt_count = 0;
13270 
13271 /* Matching path. */
13272 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13273 common->currententry->entry_label = LABEL();
13274 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13275 
13276 sljit_emit_fast_enter(compiler, TMP2, 0);
13277 count_match(common);
13278 
13279 local_size = (alt_max > 1) ? 2 : 1;
13280 
13281 /* (Reversed) stack layout:
13282    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13283 
13284 allocate_stack(common, private_data_size + local_size);
13285 /* Save return address. */
13286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13287 
13288 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13289 
13290 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13292 
13293 if (needs_control_head)
13294   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13295 
13296 if (alt_max > 1)
13297   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13298 
13299 memset(&altbacktrack, 0, sizeof(backtrack_common));
13300 common->quit_label = NULL;
13301 common->accept_label = NULL;
13302 common->quit = NULL;
13303 common->accept = NULL;
13304 altbacktrack.cc = ccbegin;
13305 cc += GET(cc, 1);
13306 while (1)
13307   {
13308   altbacktrack.top = NULL;
13309   altbacktrack.topbacktracks = NULL;
13310 
13311   if (altbacktrack.cc != ccbegin)
13312     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13313 
13314   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13315   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13316     return;
13317 
13318   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13319   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13320 
13321   if (alt_max > 1 || has_accept)
13322     {
13323     if (alt_max > 3)
13324       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13325     else
13326       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13327     }
13328 
13329   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13330 
13331   if (alt_count == 0)
13332     {
13333     /* Backtracking path entry. */
13334     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13335     common->currententry->backtrack_label = LABEL();
13336     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13337 
13338     sljit_emit_fast_enter(compiler, TMP1, 0);
13339 
13340     if (has_accept)
13341       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13342 
13343     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13344     /* Save return address. */
13345     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13346 
13347     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13348 
13349     if (alt_max > 1)
13350       {
13351       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13352       free_stack(common, 2);
13353 
13354       if (alt_max > 3)
13355         {
13356         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13357         sljit_set_put_label(put_label, LABEL());
13358         sljit_emit_op0(compiler, SLJIT_ENDBR);
13359         }
13360       else
13361         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13362       }
13363     else
13364       free_stack(common, has_accept ? 2 : 1);
13365     }
13366   else if (alt_max > 3)
13367     {
13368     sljit_set_put_label(put_label, LABEL());
13369     sljit_emit_op0(compiler, SLJIT_ENDBR);
13370     }
13371   else
13372     {
13373     JUMPHERE(next_alt);
13374     if (alt_count + 1 < alt_max)
13375       {
13376       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13377       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13378       }
13379     }
13380 
13381   alt_count++;
13382 
13383   compile_backtrackingpath(common, altbacktrack.top);
13384   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385     return;
13386   set_jumps(altbacktrack.topbacktracks, LABEL());
13387 
13388   if (*cc != OP_ALT)
13389     break;
13390 
13391   altbacktrack.cc = cc + 1 + LINK_SIZE;
13392   cc += GET(cc, 1);
13393   }
13394 
13395 /* No alternative is matched. */
13396 
13397 quit = LABEL();
13398 
13399 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13400 
13401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13402 free_stack(common, private_data_size + local_size);
13403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13404 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13405 
13406 if (common->quit != NULL)
13407   {
13408   SLJIT_ASSERT(has_quit);
13409 
13410   set_jumps(common->quit, LABEL());
13411   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13412   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13413   JUMPTO(SLJIT_JUMP, quit);
13414   }
13415 
13416 if (has_accept)
13417   {
13418   JUMPHERE(accept_exit);
13419   free_stack(common, 2);
13420 
13421   /* Save return address. */
13422   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13423 
13424   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13425 
13426   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13427   free_stack(common, private_data_size + local_size);
13428   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13429   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13430   }
13431 
13432 if (common->accept != NULL)
13433   {
13434   SLJIT_ASSERT(has_accept);
13435 
13436   set_jumps(common->accept, LABEL());
13437 
13438   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13439   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13440 
13441   allocate_stack(common, 2);
13442   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13443   }
13444 
13445 set_jumps(match, LABEL());
13446 
13447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13448 
13449 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13450 
13451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13453 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13454 }
13455 
13456 #undef COMPILE_BACKTRACKINGPATH
13457 #undef CURRENT_AS
13458 
13459 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13460   (PCRE2_JIT_INVALID_UTF)
13461 
jit_compile(pcre2_code * code,sljit_u32 mode)13462 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13463 {
13464 pcre2_real_code *re = (pcre2_real_code *)code;
13465 struct sljit_compiler *compiler;
13466 backtrack_common rootbacktrack;
13467 compiler_common common_data;
13468 compiler_common *common = &common_data;
13469 const sljit_u8 *tables = re->tables;
13470 void *allocator_data = &re->memctl;
13471 int private_data_size;
13472 PCRE2_SPTR ccend;
13473 executable_functions *functions;
13474 void *executable_func;
13475 sljit_uw executable_size;
13476 sljit_uw total_length;
13477 struct sljit_label *mainloop_label = NULL;
13478 struct sljit_label *continue_match_label;
13479 struct sljit_label *empty_match_found_label = NULL;
13480 struct sljit_label *empty_match_backtrack_label = NULL;
13481 struct sljit_label *reset_match_label;
13482 struct sljit_label *quit_label;
13483 struct sljit_jump *jump;
13484 struct sljit_jump *minlength_check_failed = NULL;
13485 struct sljit_jump *empty_match = NULL;
13486 struct sljit_jump *end_anchor_failed = NULL;
13487 jump_list *reqcu_not_found = NULL;
13488 
13489 SLJIT_ASSERT(tables);
13490 
13491 #if HAS_VIRTUAL_REGISTERS == 1
13492 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13493 #elif HAS_VIRTUAL_REGISTERS == 0
13494 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13495 #else
13496 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13497 #endif
13498 
13499 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13500 memset(common, 0, sizeof(compiler_common));
13501 common->re = re;
13502 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13503 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13504 
13505 #ifdef SUPPORT_UNICODE
13506 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13507 #endif /* SUPPORT_UNICODE */
13508 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13509 
13510 common->start = rootbacktrack.cc;
13511 common->read_only_data_head = NULL;
13512 common->fcc = tables + fcc_offset;
13513 common->lcc = (sljit_sw)(tables + lcc_offset);
13514 common->mode = mode;
13515 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13516 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13517 common->nltype = NLTYPE_FIXED;
13518 switch(re->newline_convention)
13519   {
13520   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13521   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13522   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13523   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13524   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13525   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13526   default: return PCRE2_ERROR_INTERNAL;
13527   }
13528 common->nlmax = READ_CHAR_MAX;
13529 common->nlmin = 0;
13530 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13531   common->bsr_nltype = NLTYPE_ANY;
13532 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13533   common->bsr_nltype = NLTYPE_ANYCRLF;
13534 else
13535   {
13536 #ifdef BSR_ANYCRLF
13537   common->bsr_nltype = NLTYPE_ANYCRLF;
13538 #else
13539   common->bsr_nltype = NLTYPE_ANY;
13540 #endif
13541   }
13542 common->bsr_nlmax = READ_CHAR_MAX;
13543 common->bsr_nlmin = 0;
13544 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13545 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13546 common->name_count = re->name_count;
13547 common->name_entry_size = re->name_entry_size;
13548 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13549 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13550 #ifdef SUPPORT_UNICODE
13551 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13552 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13553 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13554 if (common->utf)
13555   {
13556   if (common->nltype == NLTYPE_ANY)
13557     common->nlmax = 0x2029;
13558   else if (common->nltype == NLTYPE_ANYCRLF)
13559     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13560   else
13561     {
13562     /* We only care about the first newline character. */
13563     common->nlmax = common->newline & 0xff;
13564     }
13565 
13566   if (common->nltype == NLTYPE_FIXED)
13567     common->nlmin = common->newline & 0xff;
13568   else
13569     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13570 
13571   if (common->bsr_nltype == NLTYPE_ANY)
13572     common->bsr_nlmax = 0x2029;
13573   else
13574     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13575   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13576   }
13577 else
13578   common->invalid_utf = FALSE;
13579 #endif /* SUPPORT_UNICODE */
13580 ccend = bracketend(common->start);
13581 
13582 /* Calculate the local space size on the stack. */
13583 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13584 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13585 if (!common->optimized_cbracket)
13586   return PCRE2_ERROR_NOMEMORY;
13587 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13588 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13589 #else
13590 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13591 #endif
13592 
13593 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13594 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13595 common->capture_last_ptr = common->ovector_start;
13596 common->ovector_start += sizeof(sljit_sw);
13597 #endif
13598 if (!check_opcode_types(common, common->start, ccend))
13599   {
13600   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13601   return PCRE2_ERROR_NOMEMORY;
13602   }
13603 
13604 /* Checking flags and updating ovector_start. */
13605 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13606   {
13607   common->req_char_ptr = common->ovector_start;
13608   common->ovector_start += sizeof(sljit_sw);
13609   }
13610 if (mode != PCRE2_JIT_COMPLETE)
13611   {
13612   common->start_used_ptr = common->ovector_start;
13613   common->ovector_start += sizeof(sljit_sw);
13614   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13615     {
13616     common->hit_start = common->ovector_start;
13617     common->ovector_start += sizeof(sljit_sw);
13618     }
13619   }
13620 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13621   {
13622   common->match_end_ptr = common->ovector_start;
13623   common->ovector_start += sizeof(sljit_sw);
13624   }
13625 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13626 common->control_head_ptr = 1;
13627 #endif
13628 if (common->control_head_ptr != 0)
13629   {
13630   common->control_head_ptr = common->ovector_start;
13631   common->ovector_start += sizeof(sljit_sw);
13632   }
13633 if (common->has_set_som)
13634   {
13635   /* Saving the real start pointer is necessary. */
13636   common->start_ptr = common->ovector_start;
13637   common->ovector_start += sizeof(sljit_sw);
13638   }
13639 
13640 /* Aligning ovector to even number of sljit words. */
13641 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13642   common->ovector_start += sizeof(sljit_sw);
13643 
13644 if (common->start_ptr == 0)
13645   common->start_ptr = OVECTOR(0);
13646 
13647 /* Capturing brackets cannot be optimized if callouts are allowed. */
13648 if (common->capture_last_ptr != 0)
13649   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13650 
13651 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13652 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13653 
13654 total_length = ccend - common->start;
13655 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13656 if (!common->private_data_ptrs)
13657   {
13658   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13659   return PCRE2_ERROR_NOMEMORY;
13660   }
13661 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13662 
13663 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13664 set_private_data_ptrs(common, &private_data_size, ccend);
13665 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13666   detect_early_fail(common, common->start, &private_data_size, 0, 0);
13667 
13668 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13669 
13670 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13671   {
13672   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13673   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13674   return PCRE2_ERROR_NOMEMORY;
13675   }
13676 
13677 if (common->has_then)
13678   {
13679   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13680   memset(common->then_offsets, 0, total_length);
13681   set_then_offsets(common, common->start, NULL);
13682   }
13683 
13684 compiler = sljit_create_compiler(allocator_data, NULL);
13685 if (!compiler)
13686   {
13687   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13688   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13689   return PCRE2_ERROR_NOMEMORY;
13690   }
13691 common->compiler = compiler;
13692 
13693 /* Main pcre_jit_exec entry. */
13694 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13695 
13696 /* Register init. */
13697 reset_ovector(common, (re->top_bracket + 1) * 2);
13698 if (common->req_char_ptr != 0)
13699   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13700 
13701 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13704 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13705 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13706 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13707 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13708 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13709 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13711 
13712 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13713   reset_early_fail(common);
13714 
13715 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13716   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13717 if (common->mark_ptr != 0)
13718   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13719 if (common->control_head_ptr != 0)
13720   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13721 
13722 /* Main part of the matching */
13723 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13724   {
13725   mainloop_label = mainloop_entry(common);
13726   continue_match_label = LABEL();
13727   /* Forward search if possible. */
13728   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13729     {
13730     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13731       ;
13732     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13733       fast_forward_first_char(common);
13734     else if ((re->flags & PCRE2_STARTLINE) != 0)
13735       fast_forward_newline(common);
13736     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13737       fast_forward_start_bits(common);
13738     }
13739   }
13740 else
13741   continue_match_label = LABEL();
13742 
13743 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13744   {
13745   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13746   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13747   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13748   }
13749 if (common->req_char_ptr != 0)
13750   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13751 
13752 /* Store the current STR_PTR in OVECTOR(0). */
13753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13754 /* Copy the limit of allowed recursions. */
13755 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13756 if (common->capture_last_ptr != 0)
13757   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13758 if (common->fast_forward_bc_ptr != NULL)
13759   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13760 
13761 if (common->start_ptr != OVECTOR(0))
13762   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13763 
13764 /* Copy the beginning of the string. */
13765 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13766   {
13767   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13768   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13769   JUMPHERE(jump);
13770   }
13771 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13772   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13773 
13774 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13775 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13776   {
13777   sljit_free_compiler(compiler);
13778   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13779   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13780   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13781   return PCRE2_ERROR_NOMEMORY;
13782   }
13783 
13784 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13785   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13786 
13787 if (common->might_be_empty)
13788   {
13789   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13790   empty_match_found_label = LABEL();
13791   }
13792 
13793 common->accept_label = LABEL();
13794 if (common->accept != NULL)
13795   set_jumps(common->accept, common->accept_label);
13796 
13797 /* This means we have a match. Update the ovector. */
13798 copy_ovector(common, re->top_bracket + 1);
13799 common->quit_label = common->abort_label = LABEL();
13800 if (common->quit != NULL)
13801   set_jumps(common->quit, common->quit_label);
13802 if (common->abort != NULL)
13803   set_jumps(common->abort, common->abort_label);
13804 if (minlength_check_failed != NULL)
13805   SET_LABEL(minlength_check_failed, common->abort_label);
13806 
13807 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13808 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13809 
13810 if (common->failed_match != NULL)
13811   {
13812   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13813   set_jumps(common->failed_match, LABEL());
13814   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13815   JUMPTO(SLJIT_JUMP, common->abort_label);
13816   }
13817 
13818 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13819   JUMPHERE(end_anchor_failed);
13820 
13821 if (mode != PCRE2_JIT_COMPLETE)
13822   {
13823   common->partialmatchlabel = LABEL();
13824   set_jumps(common->partialmatch, common->partialmatchlabel);
13825   return_with_partial_match(common, common->quit_label);
13826   }
13827 
13828 if (common->might_be_empty)
13829   empty_match_backtrack_label = LABEL();
13830 compile_backtrackingpath(common, rootbacktrack.top);
13831 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13832   {
13833   sljit_free_compiler(compiler);
13834   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13835   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13836   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13837   return PCRE2_ERROR_NOMEMORY;
13838   }
13839 
13840 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13841 reset_match_label = LABEL();
13842 
13843 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844   {
13845   /* Update hit_start only in the first time. */
13846   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13847   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13848   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13849   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13850   JUMPHERE(jump);
13851   }
13852 
13853 /* Check we have remaining characters. */
13854 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13855   {
13856   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13857   }
13858 
13859 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13860     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13861 
13862 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13863   {
13864   if (common->ff_newline_shortcut != NULL)
13865     {
13866     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13867     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13868       {
13869       if (common->match_end_ptr != 0)
13870         {
13871         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13872         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13873         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13874         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13875         }
13876       else
13877         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13878       }
13879     }
13880   else
13881     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13882   }
13883 
13884 /* No more remaining characters. */
13885 if (reqcu_not_found != NULL)
13886   set_jumps(reqcu_not_found, LABEL());
13887 
13888 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13889   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13890 
13891 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13892 JUMPTO(SLJIT_JUMP, common->quit_label);
13893 
13894 flush_stubs(common);
13895 
13896 if (common->might_be_empty)
13897   {
13898   JUMPHERE(empty_match);
13899   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13900   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13901   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13902   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13903   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13904   JUMPTO(SLJIT_ZERO, empty_match_found_label);
13905   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13906   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13907   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13908   }
13909 
13910 common->fast_forward_bc_ptr = NULL;
13911 common->early_fail_start_ptr = 0;
13912 common->early_fail_end_ptr = 0;
13913 common->currententry = common->entries;
13914 common->local_quit_available = TRUE;
13915 quit_label = common->quit_label;
13916 while (common->currententry != NULL)
13917   {
13918   /* Might add new entries. */
13919   compile_recurse(common);
13920   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13921     {
13922     sljit_free_compiler(compiler);
13923     SLJIT_FREE(common->optimized_cbracket, allocator_data);
13924     SLJIT_FREE(common->private_data_ptrs, allocator_data);
13925     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13926     return PCRE2_ERROR_NOMEMORY;
13927     }
13928   flush_stubs(common);
13929   common->currententry = common->currententry->next;
13930   }
13931 common->local_quit_available = FALSE;
13932 common->quit_label = quit_label;
13933 
13934 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13935 /* This is a (really) rare case. */
13936 set_jumps(common->stackalloc, LABEL());
13937 /* RETURN_ADDR is not a saved register. */
13938 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13939 
13940 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13941 
13942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13943 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13944 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13945 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13946 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13947 
13948 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13949 
13950 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13951 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13952 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13954 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13955 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13956 
13957 /* Allocation failed. */
13958 JUMPHERE(jump);
13959 /* We break the return address cache here, but this is a really rare case. */
13960 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13961 JUMPTO(SLJIT_JUMP, common->quit_label);
13962 
13963 /* Call limit reached. */
13964 set_jumps(common->calllimit, LABEL());
13965 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13966 JUMPTO(SLJIT_JUMP, common->quit_label);
13967 
13968 if (common->revertframes != NULL)
13969   {
13970   set_jumps(common->revertframes, LABEL());
13971   do_revertframes(common);
13972   }
13973 if (common->wordboundary != NULL)
13974   {
13975   set_jumps(common->wordboundary, LABEL());
13976   check_wordboundary(common);
13977   }
13978 if (common->anynewline != NULL)
13979   {
13980   set_jumps(common->anynewline, LABEL());
13981   check_anynewline(common);
13982   }
13983 if (common->hspace != NULL)
13984   {
13985   set_jumps(common->hspace, LABEL());
13986   check_hspace(common);
13987   }
13988 if (common->vspace != NULL)
13989   {
13990   set_jumps(common->vspace, LABEL());
13991   check_vspace(common);
13992   }
13993 if (common->casefulcmp != NULL)
13994   {
13995   set_jumps(common->casefulcmp, LABEL());
13996   do_casefulcmp(common);
13997   }
13998 if (common->caselesscmp != NULL)
13999   {
14000   set_jumps(common->caselesscmp, LABEL());
14001   do_caselesscmp(common);
14002   }
14003 if (common->reset_match != NULL)
14004   {
14005   set_jumps(common->reset_match, LABEL());
14006   do_reset_match(common, (re->top_bracket + 1) * 2);
14007   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14008   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14009   JUMPTO(SLJIT_JUMP, reset_match_label);
14010   }
14011 #ifdef SUPPORT_UNICODE
14012 #if PCRE2_CODE_UNIT_WIDTH == 8
14013 if (common->utfreadchar != NULL)
14014   {
14015   set_jumps(common->utfreadchar, LABEL());
14016   do_utfreadchar(common);
14017   }
14018 if (common->utfreadtype8 != NULL)
14019   {
14020   set_jumps(common->utfreadtype8, LABEL());
14021   do_utfreadtype8(common);
14022   }
14023 if (common->utfpeakcharback != NULL)
14024   {
14025   set_jumps(common->utfpeakcharback, LABEL());
14026   do_utfpeakcharback(common);
14027   }
14028 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14029 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14030 if (common->utfreadchar_invalid != NULL)
14031   {
14032   set_jumps(common->utfreadchar_invalid, LABEL());
14033   do_utfreadchar_invalid(common);
14034   }
14035 if (common->utfreadnewline_invalid != NULL)
14036   {
14037   set_jumps(common->utfreadnewline_invalid, LABEL());
14038   do_utfreadnewline_invalid(common);
14039   }
14040 if (common->utfmoveback_invalid)
14041   {
14042   set_jumps(common->utfmoveback_invalid, LABEL());
14043   do_utfmoveback_invalid(common);
14044   }
14045 if (common->utfpeakcharback_invalid)
14046   {
14047   set_jumps(common->utfpeakcharback_invalid, LABEL());
14048   do_utfpeakcharback_invalid(common);
14049   }
14050 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14051 if (common->getucd != NULL)
14052   {
14053   set_jumps(common->getucd, LABEL());
14054   do_getucd(common);
14055   }
14056 if (common->getucdtype != NULL)
14057   {
14058   set_jumps(common->getucdtype, LABEL());
14059   do_getucdtype(common);
14060   }
14061 #endif /* SUPPORT_UNICODE */
14062 
14063 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14064 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14065 
14066 executable_func = sljit_generate_code(compiler);
14067 executable_size = sljit_get_generated_code_size(compiler);
14068 sljit_free_compiler(compiler);
14069 
14070 if (executable_func == NULL)
14071   {
14072   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14073   return PCRE2_ERROR_NOMEMORY;
14074   }
14075 
14076 /* Reuse the function descriptor if possible. */
14077 if (re->executable_jit != NULL)
14078   functions = (executable_functions *)re->executable_jit;
14079 else
14080   {
14081   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14082   if (functions == NULL)
14083     {
14084     /* This case is highly unlikely since we just recently
14085     freed a lot of memory. Not impossible though. */
14086     sljit_free_code(executable_func, NULL);
14087     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14088     return PCRE2_ERROR_NOMEMORY;
14089     }
14090   memset(functions, 0, sizeof(executable_functions));
14091   functions->top_bracket = re->top_bracket + 1;
14092   functions->limit_match = re->limit_match;
14093   re->executable_jit = functions;
14094   }
14095 
14096 /* Turn mode into an index. */
14097 if (mode == PCRE2_JIT_COMPLETE)
14098   mode = 0;
14099 else
14100   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14101 
14102 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14103 functions->executable_funcs[mode] = executable_func;
14104 functions->read_only_data_heads[mode] = common->read_only_data_head;
14105 functions->executable_sizes[mode] = executable_size;
14106 return 0;
14107 }
14108 
14109 #endif
14110 
14111 /*************************************************
14112 *        JIT compile a Regular Expression        *
14113 *************************************************/
14114 
14115 /* This function used JIT to convert a previously-compiled pattern into machine
14116 code.
14117 
14118 Arguments:
14119   code          a compiled pattern
14120   options       JIT option bits
14121 
14122 Returns:        0: success or (*NOJIT) was used
14123                <0: an error code
14124 */
14125 
14126 #define PUBLIC_JIT_COMPILE_OPTIONS \
14127   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14128 
14129 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14130 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14131 {
14132 pcre2_real_code *re = (pcre2_real_code *)code;
14133 
14134 if (code == NULL)
14135   return PCRE2_ERROR_NULL;
14136 
14137 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14138   return PCRE2_ERROR_JIT_BADOPTION;
14139 
14140 /* Support for invalid UTF was first introduced in JIT, with the option
14141 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14142 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14143 preferred feature, with the earlier option deprecated. However, for backward
14144 compatibility, if the earlier option is set, it forces the new option so that
14145 if JIT matching falls back to the interpreter, there is still support for
14146 invalid UTF. However, if this function has already been successfully called
14147 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14148 non-invalid-supporting JIT code was compiled), give an error.
14149 
14150 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14151 actions are needed:
14152 
14153   1. Remove the definition from pcre2.h.in and from the list in
14154      PUBLIC_JIT_COMPILE_OPTIONS above.
14155 
14156   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14157 
14158   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14159 
14160   4. Delete the following short block of code. The setting of "re" and
14161      "functions" can be moved into the JIT-only block below, but if that is
14162      done, (void)re and (void)functions will be needed in the non-JIT case, to
14163      avoid compiler warnings.
14164 */
14165 
14166 #ifdef SUPPORT_JIT
14167 executable_functions *functions = (executable_functions *)re->executable_jit;
14168 static int executable_allocator_is_working = 0;
14169 #endif
14170 
14171 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14172   {
14173   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14174     {
14175 #ifdef SUPPORT_JIT
14176     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14177 #endif
14178     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14179     }
14180   }
14181 
14182 /* The above tests are run with and without JIT support. This means that
14183 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14184 interpreter support) even in the absence of JIT. But now, if there is no JIT
14185 support, give an error return. */
14186 
14187 #ifndef SUPPORT_JIT
14188 return PCRE2_ERROR_JIT_BADOPTION;
14189 #else  /* SUPPORT_JIT */
14190 
14191 /* There is JIT support. Do the necessary. */
14192 
14193 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14194 
14195 if (executable_allocator_is_working == 0)
14196   {
14197   /* Checks whether the executable allocator is working. This check
14198      might run multiple times in multi-threaded environments, but the
14199      result should not be affected by it. */
14200   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14201 
14202   executable_allocator_is_working = -1;
14203 
14204   if (ptr != NULL)
14205     {
14206     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14207     executable_allocator_is_working = 1;
14208     }
14209   }
14210 
14211 if (executable_allocator_is_working < 0)
14212   return PCRE2_ERROR_NOMEMORY;
14213 
14214 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14215   options |= PCRE2_JIT_INVALID_UTF;
14216 
14217 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14218     || functions->executable_funcs[0] == NULL)) {
14219   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14220   int result = jit_compile(code, options & ~excluded_options);
14221   if (result != 0)
14222     return result;
14223   }
14224 
14225 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14226     || functions->executable_funcs[1] == NULL)) {
14227   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14228   int result = jit_compile(code, options & ~excluded_options);
14229   if (result != 0)
14230     return result;
14231   }
14232 
14233 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14234     || functions->executable_funcs[2] == NULL)) {
14235   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14236   int result = jit_compile(code, options & ~excluded_options);
14237   if (result != 0)
14238     return result;
14239   }
14240 
14241 return 0;
14242 
14243 #endif  /* SUPPORT_JIT */
14244 }
14245 
14246 /* JIT compiler uses an all-in-one approach. This improves security,
14247    since the code generator functions are not exported. */
14248 
14249 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14250 
14251 #include "pcre2_jit_match.c"
14252 #include "pcre2_jit_misc.c"
14253 
14254 /* End of pcre2_jit_compile.c */
14255