• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2018 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include "pcre2_internal.h"
46 
47 #ifdef SUPPORT_JIT
48 
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52 
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56 
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62 
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65 
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71 
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77 
78 #include "sljit/sljitLir.c"
79 
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83 
84 /* Defines for debugging purposes. */
85 
86 /* 1 - Use unoptimized capturing brackets.
87    2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89 
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92 
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96 
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100 
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105 
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108 
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114 
115   'ab' - 'a' and 'b' regexps are concatenated
116   'a+' - 'a' is the sub-expression of the '+' operator
117 
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123 
124  Greedy star operator (*) :
125    Matching path: match happens.
126    Backtrack path: match failed.
127  Non-greedy star operator (*?) :
128    Matching path: no need to perform a match.
129    Backtrack path: match is required.
130 
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134 
135    A(B|C)D
136 
137 The generated code will be the following:
138 
139  A matching path
140  '(' matching path (pushing arguments to the stack)
141  B matching path
142  ')' matching path (pushing arguments to the stack)
143  D matching path
144  return with successful match
145 
146  D backtrack path
147  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148  B backtrack path
149  C expected path
150  jump to D matching path
151  C backtrack path
152  A backtrack path
153 
154  Notice, that the order of backtrack code paths are the opposite of the fast
155  code paths. In this way the topmost value on the stack is always belong
156  to the current backtrack code path. The backtrack path must check
157  whether there is a next alternative. If so, it needs to jump back to
158  the matching path eventually. Otherwise it needs to clear out its own stack
159  frame and continue the execution on the backtrack code paths.
160 */
161 
162 /*
163 Saved stack frames:
164 
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169 
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172 
173 Thus we can restore the private data to a particular point in the stack.
174 */
175 
176 typedef struct jit_arguments {
177   /* Pointers first. */
178   struct sljit_stack *stack;
179   PCRE2_SPTR str;
180   PCRE2_SPTR begin;
181   PCRE2_SPTR end;
182   pcre2_match_data *match_data;
183   PCRE2_SPTR startchar_ptr;
184   PCRE2_UCHAR *mark_ptr;
185   int (*callout)(pcre2_callout_block *, void *);
186   void *callout_data;
187   /* Everything else after. */
188   sljit_uw offset_limit;
189   sljit_u32 limit_match;
190   sljit_u32 oveccount;
191   sljit_u32 options;
192 } jit_arguments;
193 
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195 
196 typedef struct executable_functions {
197   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_u32 top_bracket;
201   sljit_u32 limit_match;
202 } executable_functions;
203 
204 typedef struct jump_list {
205   struct sljit_jump *jump;
206   struct jump_list *next;
207 } jump_list;
208 
209 typedef struct stub_list {
210   struct sljit_jump *start;
211   struct sljit_label *quit;
212   struct stub_list *next;
213 } stub_list;
214 
215 typedef struct label_addr_list {
216   struct sljit_label *label;
217   sljit_uw *update_addr;
218   struct label_addr_list *next;
219 } label_addr_list;
220 
221 enum frame_types {
222   no_frame = -1,
223   no_stack = -2
224 };
225 
226 enum control_types {
227   type_mark = 0,
228   type_then_trap = 1
229 };
230 
231 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
232 
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238   /* Concatenation stack. */
239   struct backtrack_common *prev;
240   jump_list *nextbacktracks;
241   /* Internal stack (for component operators). */
242   struct backtrack_common *top;
243   jump_list *topbacktracks;
244   /* Opcode pointer. */
245   PCRE2_SPTR cc;
246 } backtrack_common;
247 
248 typedef struct assert_backtrack {
249   backtrack_common common;
250   jump_list *condfailed;
251   /* Less than 0 if a frame is not needed. */
252   int framesize;
253   /* Points to our private memory word on the stack. */
254   int private_data_ptr;
255   /* For iterators. */
256   struct sljit_label *matchingpath;
257 } assert_backtrack;
258 
259 typedef struct bracket_backtrack {
260   backtrack_common common;
261   /* Where to coninue if an alternative is successfully matched. */
262   struct sljit_label *alternative_matchingpath;
263   /* For rmin and rmax iterators. */
264   struct sljit_label *recursive_matchingpath;
265   /* For greedy ? operator. */
266   struct sljit_label *zero_matchingpath;
267   /* Contains the branches of a failed condition. */
268   union {
269     /* Both for OP_COND, OP_SCOND. */
270     jump_list *condfailed;
271     assert_backtrack *assert;
272     /* For OP_ONCE. Less than 0 if not needed. */
273     int framesize;
274   } u;
275   /* Points to our private memory word on the stack. */
276   int private_data_ptr;
277 } bracket_backtrack;
278 
279 typedef struct bracketpos_backtrack {
280   backtrack_common common;
281   /* Points to our private memory word on the stack. */
282   int private_data_ptr;
283   /* Reverting stack is needed. */
284   int framesize;
285   /* Allocated stack size. */
286   int stacksize;
287 } bracketpos_backtrack;
288 
289 typedef struct braminzero_backtrack {
290   backtrack_common common;
291   struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293 
294 typedef struct char_iterator_backtrack {
295   backtrack_common common;
296   /* Next iteration. */
297   struct sljit_label *matchingpath;
298   union {
299     jump_list *backtracks;
300     struct {
301       unsigned int othercasebit;
302       PCRE2_UCHAR chr;
303       BOOL enabled;
304     } charpos;
305   } u;
306 } char_iterator_backtrack;
307 
308 typedef struct ref_iterator_backtrack {
309   backtrack_common common;
310   /* Next iteration. */
311   struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313 
314 typedef struct recurse_entry {
315   struct recurse_entry *next;
316   /* Contains the function entry label. */
317   struct sljit_label *entry_label;
318   /* Contains the function entry label. */
319   struct sljit_label *backtrack_label;
320   /* Collects the entry calls until the function is not created. */
321   jump_list *entry_calls;
322   /* Collects the backtrack calls until the function is not created. */
323   jump_list *backtrack_calls;
324   /* Points to the starting opcode. */
325   sljit_sw start;
326 } recurse_entry;
327 
328 typedef struct recurse_backtrack {
329   backtrack_common common;
330   /* Return to the matching path. */
331   struct sljit_label *matchingpath;
332   /* Recursive pattern. */
333   recurse_entry *entry;
334   /* Pattern is inlined. */
335   BOOL inlined_pattern;
336 } recurse_backtrack;
337 
338 #define OP_THEN_TRAP OP_TABLE_LENGTH
339 
340 typedef struct then_trap_backtrack {
341   backtrack_common common;
342   /* If then_trap is not NULL, this structure contains the real
343   then_trap for the backtracking path. */
344   struct then_trap_backtrack *then_trap;
345   /* Points to the starting opcode. */
346   sljit_sw start;
347   /* Exit point for the then opcodes of this alternative. */
348   jump_list *quit;
349   /* Frame size of the current alternative. */
350   int framesize;
351 } then_trap_backtrack;
352 
353 #define MAX_N_CHARS 12
354 #define MAX_DIFF_CHARS 5
355 
356 typedef struct fast_forward_char_data {
357   /* Number of characters in the chars array, 255 for any character. */
358   sljit_u8 count;
359   /* Number of last UTF-8 characters in the chars array. */
360   sljit_u8 last_count;
361   /* Available characters in the current position. */
362   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
363 } fast_forward_char_data;
364 
365 #define MAX_CLASS_RANGE_SIZE 4
366 #define MAX_CLASS_CHARS_SIZE 3
367 
368 typedef struct compiler_common {
369   /* The sljit ceneric compiler. */
370   struct sljit_compiler *compiler;
371   /* Compiled regular expression. */
372   pcre2_real_code *re;
373   /* First byte code. */
374   PCRE2_SPTR start;
375   /* Maps private data offset to each opcode. */
376   sljit_s32 *private_data_ptrs;
377   /* Chain list of read-only data ptrs. */
378   void *read_only_data_head;
379   /* Tells whether the capturing bracket is optimized. */
380   sljit_u8 *optimized_cbracket;
381   /* Tells whether the starting offset is a target of then. */
382   sljit_u8 *then_offsets;
383   /* Current position where a THEN must jump. */
384   then_trap_backtrack *then_trap;
385   /* Starting offset of private data for capturing brackets. */
386   sljit_s32 cbra_ptr;
387   /* Output vector starting point. Must be divisible by 2. */
388   sljit_s32 ovector_start;
389   /* Points to the starting character of the current match. */
390   sljit_s32 start_ptr;
391   /* Last known position of the requested byte. */
392   sljit_s32 req_char_ptr;
393   /* Head of the last recursion. */
394   sljit_s32 recursive_head_ptr;
395   /* First inspected character for partial matching.
396      (Needed for avoiding zero length partial matches.) */
397   sljit_s32 start_used_ptr;
398   /* Starting pointer for partial soft matches. */
399   sljit_s32 hit_start;
400   /* Pointer of the match end position. */
401   sljit_s32 match_end_ptr;
402   /* Points to the marked string. */
403   sljit_s32 mark_ptr;
404   /* Recursive control verb management chain. */
405   sljit_s32 control_head_ptr;
406   /* Points to the last matched capture block index. */
407   sljit_s32 capture_last_ptr;
408   /* Fast forward skipping byte code pointer. */
409   PCRE2_SPTR fast_forward_bc_ptr;
410   /* Locals used by fast fail optimization. */
411   sljit_s32 fast_fail_start_ptr;
412   sljit_s32 fast_fail_end_ptr;
413 
414   /* Flipped and lower case tables. */
415   const sljit_u8 *fcc;
416   sljit_sw lcc;
417   /* Mode can be PCRE2_JIT_COMPLETE and others. */
418   int mode;
419   /* TRUE, when minlength is greater than 0. */
420   BOOL might_be_empty;
421   /* \K is found in the pattern. */
422   BOOL has_set_som;
423   /* (*SKIP:arg) is found in the pattern. */
424   BOOL has_skip_arg;
425   /* (*THEN) is found in the pattern. */
426   BOOL has_then;
427   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
428   BOOL has_skip_in_assert_back;
429   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
430   BOOL local_quit_available;
431   /* Currently in a positive assertion. */
432   BOOL in_positive_assertion;
433   /* Newline control. */
434   int nltype;
435   sljit_u32 nlmax;
436   sljit_u32 nlmin;
437   int newline;
438   int bsr_nltype;
439   sljit_u32 bsr_nlmax;
440   sljit_u32 bsr_nlmin;
441   /* Dollar endonly. */
442   int endonly;
443   /* Tables. */
444   sljit_sw ctypes;
445   /* Named capturing brackets. */
446   PCRE2_SPTR name_table;
447   sljit_sw name_count;
448   sljit_sw name_entry_size;
449 
450   /* Labels and jump lists. */
451   struct sljit_label *partialmatchlabel;
452   struct sljit_label *quit_label;
453   struct sljit_label *abort_label;
454   struct sljit_label *accept_label;
455   struct sljit_label *ff_newline_shortcut;
456   stub_list *stubs;
457   label_addr_list *label_addrs;
458   recurse_entry *entries;
459   recurse_entry *currententry;
460   jump_list *partialmatch;
461   jump_list *quit;
462   jump_list *positive_assertion_quit;
463   jump_list *abort;
464   jump_list *failed_match;
465   jump_list *accept;
466   jump_list *calllimit;
467   jump_list *stackalloc;
468   jump_list *revertframes;
469   jump_list *wordboundary;
470   jump_list *anynewline;
471   jump_list *hspace;
472   jump_list *vspace;
473   jump_list *casefulcmp;
474   jump_list *caselesscmp;
475   jump_list *reset_match;
476   BOOL unset_backref;
477   BOOL alt_circumflex;
478 #ifdef SUPPORT_UNICODE
479   BOOL utf;
480   BOOL use_ucp;
481   jump_list *getucd;
482 #if PCRE2_CODE_UNIT_WIDTH == 8
483   jump_list *utfreadchar;
484   jump_list *utfreadchar16;
485   jump_list *utfreadtype8;
486 #endif
487 #endif /* SUPPORT_UNICODE */
488 } compiler_common;
489 
490 /* For byte_sequence_compare. */
491 
492 typedef struct compare_context {
493   int length;
494   int sourcereg;
495 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
496   int ucharptr;
497   union {
498     sljit_s32 asint;
499     sljit_u16 asushort;
500 #if PCRE2_CODE_UNIT_WIDTH == 8
501     sljit_u8 asbyte;
502     sljit_u8 asuchars[4];
503 #elif PCRE2_CODE_UNIT_WIDTH == 16
504     sljit_u16 asuchars[2];
505 #elif PCRE2_CODE_UNIT_WIDTH == 32
506     sljit_u32 asuchars[1];
507 #endif
508   } c;
509   union {
510     sljit_s32 asint;
511     sljit_u16 asushort;
512 #if PCRE2_CODE_UNIT_WIDTH == 8
513     sljit_u8 asbyte;
514     sljit_u8 asuchars[4];
515 #elif PCRE2_CODE_UNIT_WIDTH == 16
516     sljit_u16 asuchars[2];
517 #elif PCRE2_CODE_UNIT_WIDTH == 32
518     sljit_u32 asuchars[1];
519 #endif
520   } oc;
521 #endif
522 } compare_context;
523 
524 /* Undefine sljit macros. */
525 #undef CMP
526 
527 /* Used for accessing the elements of the stack. */
528 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
529 
530 #ifdef SLJIT_PREF_SHIFT_REG
531 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
532 /* Nothing. */
533 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
534 #define SHIFT_REG_IS_R3
535 #else
536 #error "Unsupported shift register"
537 #endif
538 #endif
539 
540 #define TMP1          SLJIT_R0
541 #ifdef SHIFT_REG_IS_R3
542 #define TMP2          SLJIT_R3
543 #define TMP3          SLJIT_R2
544 #else
545 #define TMP2          SLJIT_R2
546 #define TMP3          SLJIT_R3
547 #endif
548 #define STR_PTR       SLJIT_R1
549 #define STR_END       SLJIT_S0
550 #define STACK_TOP     SLJIT_S1
551 #define STACK_LIMIT   SLJIT_S2
552 #define COUNT_MATCH   SLJIT_S3
553 #define ARGUMENTS     SLJIT_S4
554 #define RETURN_ADDR   SLJIT_R4
555 
556 /* Local space layout. */
557 /* These two locals can be used by the current opcode. */
558 #define LOCALS0          (0 * sizeof(sljit_sw))
559 #define LOCALS1          (1 * sizeof(sljit_sw))
560 /* Two local variables for possessive quantifiers (char1 cannot use them). */
561 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
562 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
563 /* Max limit of recursions. */
564 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
565 /* The output vector is stored on the stack, and contains pointers
566 to characters. The vector data is divided into two groups: the first
567 group contains the start / end character pointers, and the second is
568 the start pointers when the end of the capturing group has not yet reached. */
569 #define OVECTOR_START    (common->ovector_start)
570 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
571 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
572 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
573 
574 #if PCRE2_CODE_UNIT_WIDTH == 8
575 #define MOV_UCHAR  SLJIT_MOV_U8
576 #define IN_UCHARS(x) (x)
577 #elif PCRE2_CODE_UNIT_WIDTH == 16
578 #define MOV_UCHAR  SLJIT_MOV_U16
579 #define UCHAR_SHIFT (1)
580 #define IN_UCHARS(x) ((x) * 2)
581 #elif PCRE2_CODE_UNIT_WIDTH == 32
582 #define MOV_UCHAR  SLJIT_MOV_U32
583 #define UCHAR_SHIFT (2)
584 #define IN_UCHARS(x) ((x) * 4)
585 #else
586 #error Unsupported compiling mode
587 #endif
588 
589 /* Shortcuts. */
590 #define DEFINE_COMPILER \
591   struct sljit_compiler *compiler = common->compiler
592 #define OP1(op, dst, dstw, src, srcw) \
593   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
594 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
595   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
596 #define LABEL() \
597   sljit_emit_label(compiler)
598 #define JUMP(type) \
599   sljit_emit_jump(compiler, (type))
600 #define JUMPTO(type, label) \
601   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
602 #define JUMPHERE(jump) \
603   sljit_set_label((jump), sljit_emit_label(compiler))
604 #define SET_LABEL(jump, label) \
605   sljit_set_label((jump), (label))
606 #define CMP(type, src1, src1w, src2, src2w) \
607   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
608 #define CMPTO(type, src1, src1w, src2, src2w, label) \
609   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
610 #define OP_FLAGS(op, dst, dstw, type) \
611   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
612 #define CMOV(type, dst_reg, src, srcw) \
613   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
614 #define GET_LOCAL_BASE(dst, dstw, offset) \
615   sljit_get_local_base(compiler, (dst), (dstw), (offset))
616 
617 #define READ_CHAR_MAX 0x7fffffff
618 
619 #define INVALID_UTF_CHAR 888
620 
bracketend(PCRE2_SPTR cc)621 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
622 {
623 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
624 do cc += GET(cc, 1); while (*cc == OP_ALT);
625 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
626 cc += 1 + LINK_SIZE;
627 return cc;
628 }
629 
no_alternatives(PCRE2_SPTR cc)630 static int no_alternatives(PCRE2_SPTR cc)
631 {
632 int count = 0;
633 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
634 do
635   {
636   cc += GET(cc, 1);
637   count++;
638   }
639 while (*cc == OP_ALT);
640 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
641 return count;
642 }
643 
644 /* Functions whose might need modification for all new supported opcodes:
645  next_opcode
646  check_opcode_types
647  set_private_data_ptrs
648  get_framesize
649  init_frame
650  get_recurse_data_length
651  copy_recurse_data
652  compile_matchingpath
653  compile_backtrackingpath
654 */
655 
next_opcode(compiler_common * common,PCRE2_SPTR cc)656 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
657 {
658 SLJIT_UNUSED_ARG(common);
659 switch(*cc)
660   {
661   case OP_SOD:
662   case OP_SOM:
663   case OP_SET_SOM:
664   case OP_NOT_WORD_BOUNDARY:
665   case OP_WORD_BOUNDARY:
666   case OP_NOT_DIGIT:
667   case OP_DIGIT:
668   case OP_NOT_WHITESPACE:
669   case OP_WHITESPACE:
670   case OP_NOT_WORDCHAR:
671   case OP_WORDCHAR:
672   case OP_ANY:
673   case OP_ALLANY:
674   case OP_NOTPROP:
675   case OP_PROP:
676   case OP_ANYNL:
677   case OP_NOT_HSPACE:
678   case OP_HSPACE:
679   case OP_NOT_VSPACE:
680   case OP_VSPACE:
681   case OP_EXTUNI:
682   case OP_EODN:
683   case OP_EOD:
684   case OP_CIRC:
685   case OP_CIRCM:
686   case OP_DOLL:
687   case OP_DOLLM:
688   case OP_CRSTAR:
689   case OP_CRMINSTAR:
690   case OP_CRPLUS:
691   case OP_CRMINPLUS:
692   case OP_CRQUERY:
693   case OP_CRMINQUERY:
694   case OP_CRRANGE:
695   case OP_CRMINRANGE:
696   case OP_CRPOSSTAR:
697   case OP_CRPOSPLUS:
698   case OP_CRPOSQUERY:
699   case OP_CRPOSRANGE:
700   case OP_CLASS:
701   case OP_NCLASS:
702   case OP_REF:
703   case OP_REFI:
704   case OP_DNREF:
705   case OP_DNREFI:
706   case OP_RECURSE:
707   case OP_CALLOUT:
708   case OP_ALT:
709   case OP_KET:
710   case OP_KETRMAX:
711   case OP_KETRMIN:
712   case OP_KETRPOS:
713   case OP_REVERSE:
714   case OP_ASSERT:
715   case OP_ASSERT_NOT:
716   case OP_ASSERTBACK:
717   case OP_ASSERTBACK_NOT:
718   case OP_ONCE:
719   case OP_BRA:
720   case OP_BRAPOS:
721   case OP_CBRA:
722   case OP_CBRAPOS:
723   case OP_COND:
724   case OP_SBRA:
725   case OP_SBRAPOS:
726   case OP_SCBRA:
727   case OP_SCBRAPOS:
728   case OP_SCOND:
729   case OP_CREF:
730   case OP_DNCREF:
731   case OP_RREF:
732   case OP_DNRREF:
733   case OP_FALSE:
734   case OP_TRUE:
735   case OP_BRAZERO:
736   case OP_BRAMINZERO:
737   case OP_BRAPOSZERO:
738   case OP_PRUNE:
739   case OP_SKIP:
740   case OP_THEN:
741   case OP_COMMIT:
742   case OP_FAIL:
743   case OP_ACCEPT:
744   case OP_ASSERT_ACCEPT:
745   case OP_CLOSE:
746   case OP_SKIPZERO:
747   return cc + PRIV(OP_lengths)[*cc];
748 
749   case OP_CHAR:
750   case OP_CHARI:
751   case OP_NOT:
752   case OP_NOTI:
753   case OP_STAR:
754   case OP_MINSTAR:
755   case OP_PLUS:
756   case OP_MINPLUS:
757   case OP_QUERY:
758   case OP_MINQUERY:
759   case OP_UPTO:
760   case OP_MINUPTO:
761   case OP_EXACT:
762   case OP_POSSTAR:
763   case OP_POSPLUS:
764   case OP_POSQUERY:
765   case OP_POSUPTO:
766   case OP_STARI:
767   case OP_MINSTARI:
768   case OP_PLUSI:
769   case OP_MINPLUSI:
770   case OP_QUERYI:
771   case OP_MINQUERYI:
772   case OP_UPTOI:
773   case OP_MINUPTOI:
774   case OP_EXACTI:
775   case OP_POSSTARI:
776   case OP_POSPLUSI:
777   case OP_POSQUERYI:
778   case OP_POSUPTOI:
779   case OP_NOTSTAR:
780   case OP_NOTMINSTAR:
781   case OP_NOTPLUS:
782   case OP_NOTMINPLUS:
783   case OP_NOTQUERY:
784   case OP_NOTMINQUERY:
785   case OP_NOTUPTO:
786   case OP_NOTMINUPTO:
787   case OP_NOTEXACT:
788   case OP_NOTPOSSTAR:
789   case OP_NOTPOSPLUS:
790   case OP_NOTPOSQUERY:
791   case OP_NOTPOSUPTO:
792   case OP_NOTSTARI:
793   case OP_NOTMINSTARI:
794   case OP_NOTPLUSI:
795   case OP_NOTMINPLUSI:
796   case OP_NOTQUERYI:
797   case OP_NOTMINQUERYI:
798   case OP_NOTUPTOI:
799   case OP_NOTMINUPTOI:
800   case OP_NOTEXACTI:
801   case OP_NOTPOSSTARI:
802   case OP_NOTPOSPLUSI:
803   case OP_NOTPOSQUERYI:
804   case OP_NOTPOSUPTOI:
805   cc += PRIV(OP_lengths)[*cc];
806 #ifdef SUPPORT_UNICODE
807   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
808 #endif
809   return cc;
810 
811   /* Special cases. */
812   case OP_TYPESTAR:
813   case OP_TYPEMINSTAR:
814   case OP_TYPEPLUS:
815   case OP_TYPEMINPLUS:
816   case OP_TYPEQUERY:
817   case OP_TYPEMINQUERY:
818   case OP_TYPEUPTO:
819   case OP_TYPEMINUPTO:
820   case OP_TYPEEXACT:
821   case OP_TYPEPOSSTAR:
822   case OP_TYPEPOSPLUS:
823   case OP_TYPEPOSQUERY:
824   case OP_TYPEPOSUPTO:
825   return cc + PRIV(OP_lengths)[*cc] - 1;
826 
827   case OP_ANYBYTE:
828 #ifdef SUPPORT_UNICODE
829   if (common->utf) return NULL;
830 #endif
831   return cc + 1;
832 
833   case OP_CALLOUT_STR:
834   return cc + GET(cc, 1 + 2*LINK_SIZE);
835 
836 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
837   case OP_XCLASS:
838   return cc + GET(cc, 1);
839 #endif
840 
841   case OP_MARK:
842   case OP_COMMIT_ARG:
843   case OP_PRUNE_ARG:
844   case OP_SKIP_ARG:
845   case OP_THEN_ARG:
846   return cc + 1 + 2 + cc[1];
847 
848   default:
849   /* All opcodes are supported now! */
850   SLJIT_UNREACHABLE();
851   return NULL;
852   }
853 }
854 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)855 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
856 {
857 int count;
858 PCRE2_SPTR slot;
859 PCRE2_SPTR assert_back_end = cc - 1;
860 
861 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
862 while (cc < ccend)
863   {
864   switch(*cc)
865     {
866     case OP_SET_SOM:
867     common->has_set_som = TRUE;
868     common->might_be_empty = TRUE;
869     cc += 1;
870     break;
871 
872     case OP_REF:
873     case OP_REFI:
874     common->optimized_cbracket[GET2(cc, 1)] = 0;
875     cc += 1 + IMM2_SIZE;
876     break;
877 
878     case OP_CBRAPOS:
879     case OP_SCBRAPOS:
880     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
881     cc += 1 + LINK_SIZE + IMM2_SIZE;
882     break;
883 
884     case OP_COND:
885     case OP_SCOND:
886     /* Only AUTO_CALLOUT can insert this opcode. We do
887        not intend to support this case. */
888     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
889       return FALSE;
890     cc += 1 + LINK_SIZE;
891     break;
892 
893     case OP_CREF:
894     common->optimized_cbracket[GET2(cc, 1)] = 0;
895     cc += 1 + IMM2_SIZE;
896     break;
897 
898     case OP_DNREF:
899     case OP_DNREFI:
900     case OP_DNCREF:
901     count = GET2(cc, 1 + IMM2_SIZE);
902     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
903     while (count-- > 0)
904       {
905       common->optimized_cbracket[GET2(slot, 0)] = 0;
906       slot += common->name_entry_size;
907       }
908     cc += 1 + 2 * IMM2_SIZE;
909     break;
910 
911     case OP_RECURSE:
912     /* Set its value only once. */
913     if (common->recursive_head_ptr == 0)
914       {
915       common->recursive_head_ptr = common->ovector_start;
916       common->ovector_start += sizeof(sljit_sw);
917       }
918     cc += 1 + LINK_SIZE;
919     break;
920 
921     case OP_CALLOUT:
922     case OP_CALLOUT_STR:
923     if (common->capture_last_ptr == 0)
924       {
925       common->capture_last_ptr = common->ovector_start;
926       common->ovector_start += sizeof(sljit_sw);
927       }
928     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
929     break;
930 
931     case OP_ASSERTBACK:
932     slot = bracketend(cc);
933     if (slot > assert_back_end)
934       assert_back_end = slot;
935     cc += 1 + LINK_SIZE;
936     break;
937 
938     case OP_THEN_ARG:
939     common->has_then = TRUE;
940     common->control_head_ptr = 1;
941     /* Fall through. */
942 
943     case OP_COMMIT_ARG:
944     case OP_PRUNE_ARG:
945     case OP_MARK:
946     if (common->mark_ptr == 0)
947       {
948       common->mark_ptr = common->ovector_start;
949       common->ovector_start += sizeof(sljit_sw);
950       }
951     cc += 1 + 2 + cc[1];
952     break;
953 
954     case OP_THEN:
955     common->has_then = TRUE;
956     common->control_head_ptr = 1;
957     cc += 1;
958     break;
959 
960     case OP_SKIP:
961     if (cc < assert_back_end)
962       common->has_skip_in_assert_back = TRUE;
963     cc += 1;
964     break;
965 
966     case OP_SKIP_ARG:
967     common->control_head_ptr = 1;
968     common->has_skip_arg = TRUE;
969     if (cc < assert_back_end)
970       common->has_skip_in_assert_back = TRUE;
971     cc += 1 + 2 + cc[1];
972     break;
973 
974     default:
975     cc = next_opcode(common, cc);
976     if (cc == NULL)
977       return FALSE;
978     break;
979     }
980   }
981 return TRUE;
982 }
983 
is_accelerated_repeat(PCRE2_SPTR cc)984 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
985 {
986 switch(*cc)
987   {
988   case OP_TYPESTAR:
989   case OP_TYPEMINSTAR:
990   case OP_TYPEPLUS:
991   case OP_TYPEMINPLUS:
992   case OP_TYPEPOSSTAR:
993   case OP_TYPEPOSPLUS:
994   return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
995 
996   case OP_STAR:
997   case OP_MINSTAR:
998   case OP_PLUS:
999   case OP_MINPLUS:
1000   case OP_POSSTAR:
1001   case OP_POSPLUS:
1002 
1003   case OP_STARI:
1004   case OP_MINSTARI:
1005   case OP_PLUSI:
1006   case OP_MINPLUSI:
1007   case OP_POSSTARI:
1008   case OP_POSPLUSI:
1009 
1010   case OP_NOTSTAR:
1011   case OP_NOTMINSTAR:
1012   case OP_NOTPLUS:
1013   case OP_NOTMINPLUS:
1014   case OP_NOTPOSSTAR:
1015   case OP_NOTPOSPLUS:
1016 
1017   case OP_NOTSTARI:
1018   case OP_NOTMINSTARI:
1019   case OP_NOTPLUSI:
1020   case OP_NOTMINPLUSI:
1021   case OP_NOTPOSSTARI:
1022   case OP_NOTPOSPLUSI:
1023   return TRUE;
1024 
1025   case OP_CLASS:
1026   case OP_NCLASS:
1027 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1028   case OP_XCLASS:
1029   cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1030 #else
1031   cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1032 #endif
1033 
1034   switch(*cc)
1035     {
1036     case OP_CRSTAR:
1037     case OP_CRMINSTAR:
1038     case OP_CRPLUS:
1039     case OP_CRMINPLUS:
1040     case OP_CRPOSSTAR:
1041     case OP_CRPOSPLUS:
1042     return TRUE;
1043     }
1044   break;
1045   }
1046 return FALSE;
1047 }
1048 
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1049 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1050 {
1051 PCRE2_SPTR cc = common->start;
1052 PCRE2_SPTR end;
1053 
1054 /* Skip not repeated brackets. */
1055 while (TRUE)
1056   {
1057   switch(*cc)
1058     {
1059     case OP_SOD:
1060     case OP_SOM:
1061     case OP_SET_SOM:
1062     case OP_NOT_WORD_BOUNDARY:
1063     case OP_WORD_BOUNDARY:
1064     case OP_EODN:
1065     case OP_EOD:
1066     case OP_CIRC:
1067     case OP_CIRCM:
1068     case OP_DOLL:
1069     case OP_DOLLM:
1070     /* Zero width assertions. */
1071     cc++;
1072     continue;
1073     }
1074 
1075   if (*cc != OP_BRA && *cc != OP_CBRA)
1076     break;
1077 
1078   end = cc + GET(cc, 1);
1079   if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1080     return FALSE;
1081   if (*cc == OP_CBRA)
1082     {
1083     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1084       return FALSE;
1085     cc += IMM2_SIZE;
1086     }
1087   cc += 1 + LINK_SIZE;
1088   }
1089 
1090 if (is_accelerated_repeat(cc))
1091   {
1092   common->fast_forward_bc_ptr = cc;
1093   common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1094   *private_data_start += sizeof(sljit_sw);
1095   return TRUE;
1096   }
1097 return FALSE;
1098 }
1099 
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1100 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1101 {
1102   PCRE2_SPTR next_alt;
1103 
1104   SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1105 
1106   if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1107     return;
1108 
1109   next_alt = bracketend(cc) - (1 + LINK_SIZE);
1110   if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1111     return;
1112 
1113   do
1114     {
1115     next_alt = cc + GET(cc, 1);
1116 
1117     cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1118 
1119     while (TRUE)
1120       {
1121       switch(*cc)
1122         {
1123         case OP_SOD:
1124         case OP_SOM:
1125         case OP_SET_SOM:
1126         case OP_NOT_WORD_BOUNDARY:
1127         case OP_WORD_BOUNDARY:
1128         case OP_EODN:
1129         case OP_EOD:
1130         case OP_CIRC:
1131         case OP_CIRCM:
1132         case OP_DOLL:
1133         case OP_DOLLM:
1134         /* Zero width assertions. */
1135         cc++;
1136         continue;
1137         }
1138       break;
1139       }
1140 
1141     if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1142       detect_fast_fail(common, cc, private_data_start, depth - 1);
1143 
1144     if (is_accelerated_repeat(cc))
1145       {
1146       common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1147 
1148       if (common->fast_fail_start_ptr == 0)
1149         common->fast_fail_start_ptr = *private_data_start;
1150 
1151       *private_data_start += sizeof(sljit_sw);
1152       common->fast_fail_end_ptr = *private_data_start;
1153 
1154       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1155         return;
1156       }
1157 
1158     cc = next_alt;
1159     }
1160   while (*cc == OP_ALT);
1161 }
1162 
get_class_iterator_size(PCRE2_SPTR cc)1163 static int get_class_iterator_size(PCRE2_SPTR cc)
1164 {
1165 sljit_u32 min;
1166 sljit_u32 max;
1167 switch(*cc)
1168   {
1169   case OP_CRSTAR:
1170   case OP_CRPLUS:
1171   return 2;
1172 
1173   case OP_CRMINSTAR:
1174   case OP_CRMINPLUS:
1175   case OP_CRQUERY:
1176   case OP_CRMINQUERY:
1177   return 1;
1178 
1179   case OP_CRRANGE:
1180   case OP_CRMINRANGE:
1181   min = GET2(cc, 1);
1182   max = GET2(cc, 1 + IMM2_SIZE);
1183   if (max == 0)
1184     return (*cc == OP_CRRANGE) ? 2 : 1;
1185   max -= min;
1186   if (max > 2)
1187     max = 2;
1188   return max;
1189 
1190   default:
1191   return 0;
1192   }
1193 }
1194 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1195 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1196 {
1197 PCRE2_SPTR end = bracketend(begin);
1198 PCRE2_SPTR next;
1199 PCRE2_SPTR next_end;
1200 PCRE2_SPTR max_end;
1201 PCRE2_UCHAR type;
1202 sljit_sw length = end - begin;
1203 sljit_s32 min, max, i;
1204 
1205 /* Detect fixed iterations first. */
1206 if (end[-(1 + LINK_SIZE)] != OP_KET)
1207   return FALSE;
1208 
1209 /* Already detected repeat. */
1210 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1211   return TRUE;
1212 
1213 next = end;
1214 min = 1;
1215 while (1)
1216   {
1217   if (*next != *begin)
1218     break;
1219   next_end = bracketend(next);
1220   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1221     break;
1222   next = next_end;
1223   min++;
1224   }
1225 
1226 if (min == 2)
1227   return FALSE;
1228 
1229 max = 0;
1230 max_end = next;
1231 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1232   {
1233   type = *next;
1234   while (1)
1235     {
1236     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1237       break;
1238     next_end = bracketend(next + 2 + LINK_SIZE);
1239     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1240       break;
1241     next = next_end;
1242     max++;
1243     }
1244 
1245   if (next[0] == type && next[1] == *begin && max >= 1)
1246     {
1247     next_end = bracketend(next + 1);
1248     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1249       {
1250       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1251         if (*next_end != OP_KET)
1252           break;
1253 
1254       if (i == max)
1255         {
1256         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1257         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1258         /* +2 the original and the last. */
1259         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1260         if (min == 1)
1261           return TRUE;
1262         min--;
1263         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1264         }
1265       }
1266     }
1267   }
1268 
1269 if (min >= 3)
1270   {
1271   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1272   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1273   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1274   return TRUE;
1275   }
1276 
1277 return FALSE;
1278 }
1279 
1280 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1281     case OP_MINSTAR: \
1282     case OP_MINPLUS: \
1283     case OP_QUERY: \
1284     case OP_MINQUERY: \
1285     case OP_MINSTARI: \
1286     case OP_MINPLUSI: \
1287     case OP_QUERYI: \
1288     case OP_MINQUERYI: \
1289     case OP_NOTMINSTAR: \
1290     case OP_NOTMINPLUS: \
1291     case OP_NOTQUERY: \
1292     case OP_NOTMINQUERY: \
1293     case OP_NOTMINSTARI: \
1294     case OP_NOTMINPLUSI: \
1295     case OP_NOTQUERYI: \
1296     case OP_NOTMINQUERYI:
1297 
1298 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1299     case OP_STAR: \
1300     case OP_PLUS: \
1301     case OP_STARI: \
1302     case OP_PLUSI: \
1303     case OP_NOTSTAR: \
1304     case OP_NOTPLUS: \
1305     case OP_NOTSTARI: \
1306     case OP_NOTPLUSI:
1307 
1308 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1309     case OP_UPTO: \
1310     case OP_MINUPTO: \
1311     case OP_UPTOI: \
1312     case OP_MINUPTOI: \
1313     case OP_NOTUPTO: \
1314     case OP_NOTMINUPTO: \
1315     case OP_NOTUPTOI: \
1316     case OP_NOTMINUPTOI:
1317 
1318 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1319     case OP_TYPEMINSTAR: \
1320     case OP_TYPEMINPLUS: \
1321     case OP_TYPEQUERY: \
1322     case OP_TYPEMINQUERY:
1323 
1324 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1325     case OP_TYPESTAR: \
1326     case OP_TYPEPLUS:
1327 
1328 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1329     case OP_TYPEUPTO: \
1330     case OP_TYPEMINUPTO:
1331 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1332 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1333 {
1334 PCRE2_SPTR cc = common->start;
1335 PCRE2_SPTR alternative;
1336 PCRE2_SPTR end = NULL;
1337 int private_data_ptr = *private_data_start;
1338 int space, size, bracketlen;
1339 BOOL repeat_check = TRUE;
1340 
1341 while (cc < ccend)
1342   {
1343   space = 0;
1344   size = 0;
1345   bracketlen = 0;
1346   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1347     break;
1348 
1349   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1350     {
1351     if (detect_repeat(common, cc))
1352       {
1353       /* These brackets are converted to repeats, so no global
1354       based single character repeat is allowed. */
1355       if (cc >= end)
1356         end = bracketend(cc);
1357       }
1358     }
1359   repeat_check = TRUE;
1360 
1361   switch(*cc)
1362     {
1363     case OP_KET:
1364     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1365       {
1366       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1367       private_data_ptr += sizeof(sljit_sw);
1368       cc += common->private_data_ptrs[cc + 1 - common->start];
1369       }
1370     cc += 1 + LINK_SIZE;
1371     break;
1372 
1373     case OP_ASSERT:
1374     case OP_ASSERT_NOT:
1375     case OP_ASSERTBACK:
1376     case OP_ASSERTBACK_NOT:
1377     case OP_ONCE:
1378     case OP_BRAPOS:
1379     case OP_SBRA:
1380     case OP_SBRAPOS:
1381     case OP_SCOND:
1382     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1383     private_data_ptr += sizeof(sljit_sw);
1384     bracketlen = 1 + LINK_SIZE;
1385     break;
1386 
1387     case OP_CBRAPOS:
1388     case OP_SCBRAPOS:
1389     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1390     private_data_ptr += sizeof(sljit_sw);
1391     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1392     break;
1393 
1394     case OP_COND:
1395     /* Might be a hidden SCOND. */
1396     alternative = cc + GET(cc, 1);
1397     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1398       {
1399       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1400       private_data_ptr += sizeof(sljit_sw);
1401       }
1402     bracketlen = 1 + LINK_SIZE;
1403     break;
1404 
1405     case OP_BRA:
1406     bracketlen = 1 + LINK_SIZE;
1407     break;
1408 
1409     case OP_CBRA:
1410     case OP_SCBRA:
1411     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1412     break;
1413 
1414     case OP_BRAZERO:
1415     case OP_BRAMINZERO:
1416     case OP_BRAPOSZERO:
1417     repeat_check = FALSE;
1418     size = 1;
1419     break;
1420 
1421     CASE_ITERATOR_PRIVATE_DATA_1
1422     space = 1;
1423     size = -2;
1424     break;
1425 
1426     CASE_ITERATOR_PRIVATE_DATA_2A
1427     space = 2;
1428     size = -2;
1429     break;
1430 
1431     CASE_ITERATOR_PRIVATE_DATA_2B
1432     space = 2;
1433     size = -(2 + IMM2_SIZE);
1434     break;
1435 
1436     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1437     space = 1;
1438     size = 1;
1439     break;
1440 
1441     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1442     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1443       space = 2;
1444     size = 1;
1445     break;
1446 
1447     case OP_TYPEUPTO:
1448     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1449       space = 2;
1450     size = 1 + IMM2_SIZE;
1451     break;
1452 
1453     case OP_TYPEMINUPTO:
1454     space = 2;
1455     size = 1 + IMM2_SIZE;
1456     break;
1457 
1458     case OP_CLASS:
1459     case OP_NCLASS:
1460     space = get_class_iterator_size(cc + size);
1461     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1462     break;
1463 
1464 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1465     case OP_XCLASS:
1466     space = get_class_iterator_size(cc + size);
1467     size = GET(cc, 1);
1468     break;
1469 #endif
1470 
1471     default:
1472     cc = next_opcode(common, cc);
1473     SLJIT_ASSERT(cc != NULL);
1474     break;
1475     }
1476 
1477   /* Character iterators, which are not inside a repeated bracket,
1478      gets a private slot instead of allocating it on the stack. */
1479   if (space > 0 && cc >= end)
1480     {
1481     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1482     private_data_ptr += sizeof(sljit_sw) * space;
1483     }
1484 
1485   if (size != 0)
1486     {
1487     if (size < 0)
1488       {
1489       cc += -size;
1490 #ifdef SUPPORT_UNICODE
1491       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1492 #endif
1493       }
1494     else
1495       cc += size;
1496     }
1497 
1498   if (bracketlen > 0)
1499     {
1500     if (cc >= end)
1501       {
1502       end = bracketend(cc);
1503       if (end[-1 - LINK_SIZE] == OP_KET)
1504         end = NULL;
1505       }
1506     cc += bracketlen;
1507     }
1508   }
1509 *private_data_start = private_data_ptr;
1510 }
1511 
1512 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1513 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1514 {
1515 int length = 0;
1516 int possessive = 0;
1517 BOOL stack_restore = FALSE;
1518 BOOL setsom_found = recursive;
1519 BOOL setmark_found = recursive;
1520 /* The last capture is a local variable even for recursions. */
1521 BOOL capture_last_found = FALSE;
1522 
1523 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1524 SLJIT_ASSERT(common->control_head_ptr != 0);
1525 *needs_control_head = TRUE;
1526 #else
1527 *needs_control_head = FALSE;
1528 #endif
1529 
1530 if (ccend == NULL)
1531   {
1532   ccend = bracketend(cc) - (1 + LINK_SIZE);
1533   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1534     {
1535     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1536     /* This is correct regardless of common->capture_last_ptr. */
1537     capture_last_found = TRUE;
1538     }
1539   cc = next_opcode(common, cc);
1540   }
1541 
1542 SLJIT_ASSERT(cc != NULL);
1543 while (cc < ccend)
1544   switch(*cc)
1545     {
1546     case OP_SET_SOM:
1547     SLJIT_ASSERT(common->has_set_som);
1548     stack_restore = TRUE;
1549     if (!setsom_found)
1550       {
1551       length += 2;
1552       setsom_found = TRUE;
1553       }
1554     cc += 1;
1555     break;
1556 
1557     case OP_MARK:
1558     case OP_COMMIT_ARG:
1559     case OP_PRUNE_ARG:
1560     case OP_THEN_ARG:
1561     SLJIT_ASSERT(common->mark_ptr != 0);
1562     stack_restore = TRUE;
1563     if (!setmark_found)
1564       {
1565       length += 2;
1566       setmark_found = TRUE;
1567       }
1568     if (common->control_head_ptr != 0)
1569       *needs_control_head = TRUE;
1570     cc += 1 + 2 + cc[1];
1571     break;
1572 
1573     case OP_RECURSE:
1574     stack_restore = TRUE;
1575     if (common->has_set_som && !setsom_found)
1576       {
1577       length += 2;
1578       setsom_found = TRUE;
1579       }
1580     if (common->mark_ptr != 0 && !setmark_found)
1581       {
1582       length += 2;
1583       setmark_found = TRUE;
1584       }
1585     if (common->capture_last_ptr != 0 && !capture_last_found)
1586       {
1587       length += 2;
1588       capture_last_found = TRUE;
1589       }
1590     cc += 1 + LINK_SIZE;
1591     break;
1592 
1593     case OP_CBRA:
1594     case OP_CBRAPOS:
1595     case OP_SCBRA:
1596     case OP_SCBRAPOS:
1597     stack_restore = TRUE;
1598     if (common->capture_last_ptr != 0 && !capture_last_found)
1599       {
1600       length += 2;
1601       capture_last_found = TRUE;
1602       }
1603     length += 3;
1604     cc += 1 + LINK_SIZE + IMM2_SIZE;
1605     break;
1606 
1607     case OP_THEN:
1608     stack_restore = TRUE;
1609     if (common->control_head_ptr != 0)
1610       *needs_control_head = TRUE;
1611     cc ++;
1612     break;
1613 
1614     default:
1615     stack_restore = TRUE;
1616     /* Fall through. */
1617 
1618     case OP_NOT_WORD_BOUNDARY:
1619     case OP_WORD_BOUNDARY:
1620     case OP_NOT_DIGIT:
1621     case OP_DIGIT:
1622     case OP_NOT_WHITESPACE:
1623     case OP_WHITESPACE:
1624     case OP_NOT_WORDCHAR:
1625     case OP_WORDCHAR:
1626     case OP_ANY:
1627     case OP_ALLANY:
1628     case OP_ANYBYTE:
1629     case OP_NOTPROP:
1630     case OP_PROP:
1631     case OP_ANYNL:
1632     case OP_NOT_HSPACE:
1633     case OP_HSPACE:
1634     case OP_NOT_VSPACE:
1635     case OP_VSPACE:
1636     case OP_EXTUNI:
1637     case OP_EODN:
1638     case OP_EOD:
1639     case OP_CIRC:
1640     case OP_CIRCM:
1641     case OP_DOLL:
1642     case OP_DOLLM:
1643     case OP_CHAR:
1644     case OP_CHARI:
1645     case OP_NOT:
1646     case OP_NOTI:
1647 
1648     case OP_EXACT:
1649     case OP_POSSTAR:
1650     case OP_POSPLUS:
1651     case OP_POSQUERY:
1652     case OP_POSUPTO:
1653 
1654     case OP_EXACTI:
1655     case OP_POSSTARI:
1656     case OP_POSPLUSI:
1657     case OP_POSQUERYI:
1658     case OP_POSUPTOI:
1659 
1660     case OP_NOTEXACT:
1661     case OP_NOTPOSSTAR:
1662     case OP_NOTPOSPLUS:
1663     case OP_NOTPOSQUERY:
1664     case OP_NOTPOSUPTO:
1665 
1666     case OP_NOTEXACTI:
1667     case OP_NOTPOSSTARI:
1668     case OP_NOTPOSPLUSI:
1669     case OP_NOTPOSQUERYI:
1670     case OP_NOTPOSUPTOI:
1671 
1672     case OP_TYPEEXACT:
1673     case OP_TYPEPOSSTAR:
1674     case OP_TYPEPOSPLUS:
1675     case OP_TYPEPOSQUERY:
1676     case OP_TYPEPOSUPTO:
1677 
1678     case OP_CLASS:
1679     case OP_NCLASS:
1680     case OP_XCLASS:
1681 
1682     case OP_CALLOUT:
1683     case OP_CALLOUT_STR:
1684 
1685     cc = next_opcode(common, cc);
1686     SLJIT_ASSERT(cc != NULL);
1687     break;
1688     }
1689 
1690 /* Possessive quantifiers can use a special case. */
1691 if (SLJIT_UNLIKELY(possessive == length))
1692   return stack_restore ? no_frame : no_stack;
1693 
1694 if (length > 0)
1695   return length + 1;
1696 return stack_restore ? no_frame : no_stack;
1697 }
1698 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)1699 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
1700 {
1701 DEFINE_COMPILER;
1702 BOOL setsom_found = FALSE;
1703 BOOL setmark_found = FALSE;
1704 /* The last capture is a local variable even for recursions. */
1705 BOOL capture_last_found = FALSE;
1706 int offset;
1707 
1708 /* >= 1 + shortest item size (2) */
1709 SLJIT_UNUSED_ARG(stacktop);
1710 SLJIT_ASSERT(stackpos >= stacktop + 2);
1711 
1712 stackpos = STACK(stackpos);
1713 if (ccend == NULL)
1714   {
1715   ccend = bracketend(cc) - (1 + LINK_SIZE);
1716   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
1717     cc = next_opcode(common, cc);
1718   }
1719 
1720 SLJIT_ASSERT(cc != NULL);
1721 while (cc < ccend)
1722   switch(*cc)
1723     {
1724     case OP_SET_SOM:
1725     SLJIT_ASSERT(common->has_set_som);
1726     if (!setsom_found)
1727       {
1728       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1729       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1730       stackpos -= (int)sizeof(sljit_sw);
1731       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1732       stackpos -= (int)sizeof(sljit_sw);
1733       setsom_found = TRUE;
1734       }
1735     cc += 1;
1736     break;
1737 
1738     case OP_MARK:
1739     case OP_COMMIT_ARG:
1740     case OP_PRUNE_ARG:
1741     case OP_THEN_ARG:
1742     SLJIT_ASSERT(common->mark_ptr != 0);
1743     if (!setmark_found)
1744       {
1745       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1746       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1747       stackpos -= (int)sizeof(sljit_sw);
1748       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1749       stackpos -= (int)sizeof(sljit_sw);
1750       setmark_found = TRUE;
1751       }
1752     cc += 1 + 2 + cc[1];
1753     break;
1754 
1755     case OP_RECURSE:
1756     if (common->has_set_som && !setsom_found)
1757       {
1758       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1759       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1760       stackpos -= (int)sizeof(sljit_sw);
1761       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1762       stackpos -= (int)sizeof(sljit_sw);
1763       setsom_found = TRUE;
1764       }
1765     if (common->mark_ptr != 0 && !setmark_found)
1766       {
1767       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1768       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1769       stackpos -= (int)sizeof(sljit_sw);
1770       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1771       stackpos -= (int)sizeof(sljit_sw);
1772       setmark_found = TRUE;
1773       }
1774     if (common->capture_last_ptr != 0 && !capture_last_found)
1775       {
1776       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1777       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1778       stackpos -= (int)sizeof(sljit_sw);
1779       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1780       stackpos -= (int)sizeof(sljit_sw);
1781       capture_last_found = TRUE;
1782       }
1783     cc += 1 + LINK_SIZE;
1784     break;
1785 
1786     case OP_CBRA:
1787     case OP_CBRAPOS:
1788     case OP_SCBRA:
1789     case OP_SCBRAPOS:
1790     if (common->capture_last_ptr != 0 && !capture_last_found)
1791       {
1792       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1793       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1794       stackpos -= (int)sizeof(sljit_sw);
1795       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1796       stackpos -= (int)sizeof(sljit_sw);
1797       capture_last_found = TRUE;
1798       }
1799     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1800     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1801     stackpos -= (int)sizeof(sljit_sw);
1802     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1803     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1804     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1805     stackpos -= (int)sizeof(sljit_sw);
1806     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1807     stackpos -= (int)sizeof(sljit_sw);
1808 
1809     cc += 1 + LINK_SIZE + IMM2_SIZE;
1810     break;
1811 
1812     default:
1813     cc = next_opcode(common, cc);
1814     SLJIT_ASSERT(cc != NULL);
1815     break;
1816     }
1817 
1818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1819 SLJIT_ASSERT(stackpos == STACK(stacktop));
1820 }
1821 
1822 #define RECURSE_TMP_REG_COUNT 3
1823 
1824 typedef struct delayed_mem_copy_status {
1825   struct sljit_compiler *compiler;
1826   int store_bases[RECURSE_TMP_REG_COUNT];
1827   int store_offsets[RECURSE_TMP_REG_COUNT];
1828   int tmp_regs[RECURSE_TMP_REG_COUNT];
1829   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
1830   int next_tmp_reg;
1831 } delayed_mem_copy_status;
1832 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)1833 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
1834 {
1835 int i;
1836 
1837 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
1838   {
1839   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
1840   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
1841 
1842   status->store_bases[i] = -1;
1843   }
1844 status->next_tmp_reg = 0;
1845 status->compiler = common->compiler;
1846 }
1847 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)1848 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
1849   int store_base, sljit_sw store_offset)
1850 {
1851 struct sljit_compiler *compiler = status->compiler;
1852 int next_tmp_reg = status->next_tmp_reg;
1853 int tmp_reg = status->tmp_regs[next_tmp_reg];
1854 
1855 SLJIT_ASSERT(load_base > 0 && store_base > 0);
1856 
1857 if (status->store_bases[next_tmp_reg] == -1)
1858   {
1859   /* Preserve virtual registers. */
1860   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
1861     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
1862   }
1863 else
1864   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
1865 
1866 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
1867 status->store_bases[next_tmp_reg] = store_base;
1868 status->store_offsets[next_tmp_reg] = store_offset;
1869 
1870 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
1871 }
1872 
delayed_mem_copy_finish(delayed_mem_copy_status * status)1873 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
1874 {
1875 struct sljit_compiler *compiler = status->compiler;
1876 int next_tmp_reg = status->next_tmp_reg;
1877 int tmp_reg, saved_tmp_reg, i;
1878 
1879 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
1880   {
1881   if (status->store_bases[next_tmp_reg] != -1)
1882     {
1883     tmp_reg = status->tmp_regs[next_tmp_reg];
1884     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
1885 
1886     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
1887 
1888     /* Restore virtual registers. */
1889     if (sljit_get_register_index(saved_tmp_reg) < 0)
1890       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
1891     }
1892 
1893   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
1894   }
1895 }
1896 
1897 #undef RECURSE_TMP_REG_COUNT
1898 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)1899 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
1900   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
1901 {
1902 int length = 1;
1903 int size;
1904 PCRE2_SPTR alternative;
1905 BOOL quit_found = FALSE;
1906 BOOL accept_found = FALSE;
1907 BOOL setsom_found = FALSE;
1908 BOOL setmark_found = FALSE;
1909 BOOL capture_last_found = FALSE;
1910 BOOL control_head_found = FALSE;
1911 
1912 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1913 SLJIT_ASSERT(common->control_head_ptr != 0);
1914 control_head_found = TRUE;
1915 #endif
1916 
1917 /* Calculate the sum of the private machine words. */
1918 while (cc < ccend)
1919   {
1920   size = 0;
1921   switch(*cc)
1922     {
1923     case OP_SET_SOM:
1924     SLJIT_ASSERT(common->has_set_som);
1925     setsom_found = TRUE;
1926     cc += 1;
1927     break;
1928 
1929     case OP_RECURSE:
1930     if (common->has_set_som)
1931       setsom_found = TRUE;
1932     if (common->mark_ptr != 0)
1933       setmark_found = TRUE;
1934     if (common->capture_last_ptr != 0)
1935       capture_last_found = TRUE;
1936     cc += 1 + LINK_SIZE;
1937     break;
1938 
1939     case OP_KET:
1940     if (PRIVATE_DATA(cc) != 0)
1941       {
1942       length++;
1943       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1944       cc += PRIVATE_DATA(cc + 1);
1945       }
1946     cc += 1 + LINK_SIZE;
1947     break;
1948 
1949     case OP_ASSERT:
1950     case OP_ASSERT_NOT:
1951     case OP_ASSERTBACK:
1952     case OP_ASSERTBACK_NOT:
1953     case OP_ONCE:
1954     case OP_BRAPOS:
1955     case OP_SBRA:
1956     case OP_SBRAPOS:
1957     case OP_SCOND:
1958     length++;
1959     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1960     cc += 1 + LINK_SIZE;
1961     break;
1962 
1963     case OP_CBRA:
1964     case OP_SCBRA:
1965     length += 2;
1966     if (common->capture_last_ptr != 0)
1967       capture_last_found = TRUE;
1968     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1969       length++;
1970     cc += 1 + LINK_SIZE + IMM2_SIZE;
1971     break;
1972 
1973     case OP_CBRAPOS:
1974     case OP_SCBRAPOS:
1975     length += 2 + 2;
1976     if (common->capture_last_ptr != 0)
1977       capture_last_found = TRUE;
1978     cc += 1 + LINK_SIZE + IMM2_SIZE;
1979     break;
1980 
1981     case OP_COND:
1982     /* Might be a hidden SCOND. */
1983     alternative = cc + GET(cc, 1);
1984     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1985       length++;
1986     cc += 1 + LINK_SIZE;
1987     break;
1988 
1989     CASE_ITERATOR_PRIVATE_DATA_1
1990     if (PRIVATE_DATA(cc) != 0)
1991       length++;
1992     cc += 2;
1993 #ifdef SUPPORT_UNICODE
1994     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1995 #endif
1996     break;
1997 
1998     CASE_ITERATOR_PRIVATE_DATA_2A
1999     if (PRIVATE_DATA(cc) != 0)
2000       length += 2;
2001     cc += 2;
2002 #ifdef SUPPORT_UNICODE
2003     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2004 #endif
2005     break;
2006 
2007     CASE_ITERATOR_PRIVATE_DATA_2B
2008     if (PRIVATE_DATA(cc) != 0)
2009       length += 2;
2010     cc += 2 + IMM2_SIZE;
2011 #ifdef SUPPORT_UNICODE
2012     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2013 #endif
2014     break;
2015 
2016     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2017     if (PRIVATE_DATA(cc) != 0)
2018       length++;
2019     cc += 1;
2020     break;
2021 
2022     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2023     if (PRIVATE_DATA(cc) != 0)
2024       length += 2;
2025     cc += 1;
2026     break;
2027 
2028     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2029     if (PRIVATE_DATA(cc) != 0)
2030       length += 2;
2031     cc += 1 + IMM2_SIZE;
2032     break;
2033 
2034     case OP_CLASS:
2035     case OP_NCLASS:
2036 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2037     case OP_XCLASS:
2038     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2039 #else
2040     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2041 #endif
2042     if (PRIVATE_DATA(cc) != 0)
2043       length += get_class_iterator_size(cc + size);
2044     cc += size;
2045     break;
2046 
2047     case OP_MARK:
2048     case OP_COMMIT_ARG:
2049     case OP_PRUNE_ARG:
2050     case OP_THEN_ARG:
2051     SLJIT_ASSERT(common->mark_ptr != 0);
2052     if (!setmark_found)
2053       setmark_found = TRUE;
2054     if (common->control_head_ptr != 0)
2055       control_head_found = TRUE;
2056     if (*cc != OP_MARK)
2057       quit_found = TRUE;
2058 
2059     cc += 1 + 2 + cc[1];
2060     break;
2061 
2062     case OP_PRUNE:
2063     case OP_SKIP:
2064     case OP_COMMIT:
2065     quit_found = TRUE;
2066     cc++;
2067     break;
2068 
2069     case OP_SKIP_ARG:
2070     quit_found = TRUE;
2071     cc += 1 + 2 + cc[1];
2072     break;
2073 
2074     case OP_THEN:
2075     SLJIT_ASSERT(common->control_head_ptr != 0);
2076     quit_found = TRUE;
2077     if (!control_head_found)
2078       control_head_found = TRUE;
2079     cc++;
2080     break;
2081 
2082     case OP_ACCEPT:
2083     case OP_ASSERT_ACCEPT:
2084     accept_found = TRUE;
2085     cc++;
2086     break;
2087 
2088     default:
2089     cc = next_opcode(common, cc);
2090     SLJIT_ASSERT(cc != NULL);
2091     break;
2092     }
2093   }
2094 SLJIT_ASSERT(cc == ccend);
2095 
2096 if (control_head_found)
2097   length++;
2098 if (capture_last_found)
2099   length++;
2100 if (quit_found)
2101   {
2102   if (setsom_found)
2103     length++;
2104   if (setmark_found)
2105     length++;
2106   }
2107 
2108 *needs_control_head = control_head_found;
2109 *has_quit = quit_found;
2110 *has_accept = accept_found;
2111 return length;
2112 }
2113 
2114 enum copy_recurse_data_types {
2115   recurse_copy_from_global,
2116   recurse_copy_private_to_global,
2117   recurse_copy_shared_to_global,
2118   recurse_copy_kept_shared_to_global,
2119   recurse_swap_global
2120 };
2121 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2122 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2123   int type, int stackptr, int stacktop, BOOL has_quit)
2124 {
2125 delayed_mem_copy_status status;
2126 PCRE2_SPTR alternative;
2127 sljit_sw private_srcw[2];
2128 sljit_sw shared_srcw[3];
2129 sljit_sw kept_shared_srcw[2];
2130 int private_count, shared_count, kept_shared_count;
2131 int from_sp, base_reg, offset, i;
2132 BOOL setsom_found = FALSE;
2133 BOOL setmark_found = FALSE;
2134 BOOL capture_last_found = FALSE;
2135 BOOL control_head_found = FALSE;
2136 
2137 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2138 SLJIT_ASSERT(common->control_head_ptr != 0);
2139 control_head_found = TRUE;
2140 #endif
2141 
2142 switch (type)
2143   {
2144   case recurse_copy_from_global:
2145   from_sp = TRUE;
2146   base_reg = STACK_TOP;
2147   break;
2148 
2149   case recurse_copy_private_to_global:
2150   case recurse_copy_shared_to_global:
2151   case recurse_copy_kept_shared_to_global:
2152   from_sp = FALSE;
2153   base_reg = STACK_TOP;
2154   break;
2155 
2156   default:
2157   SLJIT_ASSERT(type == recurse_swap_global);
2158   from_sp = FALSE;
2159   base_reg = TMP2;
2160   break;
2161   }
2162 
2163 stackptr = STACK(stackptr);
2164 stacktop = STACK(stacktop);
2165 
2166 status.tmp_regs[0] = TMP1;
2167 status.saved_tmp_regs[0] = TMP1;
2168 
2169 if (base_reg != TMP2)
2170   {
2171   status.tmp_regs[1] = TMP2;
2172   status.saved_tmp_regs[1] = TMP2;
2173   }
2174 else
2175   {
2176   status.saved_tmp_regs[1] = RETURN_ADDR;
2177   if (sljit_get_register_index (RETURN_ADDR) == -1)
2178     status.tmp_regs[1] = STR_PTR;
2179   else
2180     status.tmp_regs[1] = RETURN_ADDR;
2181   }
2182 
2183 status.saved_tmp_regs[2] = TMP3;
2184 if (sljit_get_register_index (TMP3) == -1)
2185   status.tmp_regs[2] = STR_END;
2186 else
2187   status.tmp_regs[2] = TMP3;
2188 
2189 delayed_mem_copy_init(&status, common);
2190 
2191 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2192   {
2193   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2194 
2195   if (!from_sp)
2196     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2197 
2198   if (from_sp || type == recurse_swap_global)
2199     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2200   }
2201 
2202 stackptr += sizeof(sljit_sw);
2203 
2204 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2205 if (type != recurse_copy_shared_to_global)
2206   {
2207   if (!from_sp)
2208     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2209 
2210   if (from_sp || type == recurse_swap_global)
2211     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2212   }
2213 
2214 stackptr += sizeof(sljit_sw);
2215 #endif
2216 
2217 while (cc < ccend)
2218   {
2219   private_count = 0;
2220   shared_count = 0;
2221   kept_shared_count = 0;
2222 
2223   switch(*cc)
2224     {
2225     case OP_SET_SOM:
2226     SLJIT_ASSERT(common->has_set_som);
2227     if (has_quit && !setsom_found)
2228       {
2229       kept_shared_srcw[0] = OVECTOR(0);
2230       kept_shared_count = 1;
2231       setsom_found = TRUE;
2232       }
2233     cc += 1;
2234     break;
2235 
2236     case OP_RECURSE:
2237     if (has_quit)
2238       {
2239       if (common->has_set_som && !setsom_found)
2240         {
2241         kept_shared_srcw[0] = OVECTOR(0);
2242         kept_shared_count = 1;
2243         setsom_found = TRUE;
2244         }
2245       if (common->mark_ptr != 0 && !setmark_found)
2246         {
2247         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2248         kept_shared_count++;
2249         setmark_found = TRUE;
2250         }
2251       }
2252     if (common->capture_last_ptr != 0 && !capture_last_found)
2253       {
2254       shared_srcw[0] = common->capture_last_ptr;
2255       shared_count = 1;
2256       capture_last_found = TRUE;
2257       }
2258     cc += 1 + LINK_SIZE;
2259     break;
2260 
2261     case OP_KET:
2262     if (PRIVATE_DATA(cc) != 0)
2263       {
2264       private_count = 1;
2265       private_srcw[0] = PRIVATE_DATA(cc);
2266       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2267       cc += PRIVATE_DATA(cc + 1);
2268       }
2269     cc += 1 + LINK_SIZE;
2270     break;
2271 
2272     case OP_ASSERT:
2273     case OP_ASSERT_NOT:
2274     case OP_ASSERTBACK:
2275     case OP_ASSERTBACK_NOT:
2276     case OP_ONCE:
2277     case OP_BRAPOS:
2278     case OP_SBRA:
2279     case OP_SBRAPOS:
2280     case OP_SCOND:
2281     private_count = 1;
2282     private_srcw[0] = PRIVATE_DATA(cc);
2283     cc += 1 + LINK_SIZE;
2284     break;
2285 
2286     case OP_CBRA:
2287     case OP_SCBRA:
2288     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2289     shared_srcw[0] = OVECTOR(offset);
2290     shared_srcw[1] = OVECTOR(offset + 1);
2291     shared_count = 2;
2292 
2293     if (common->capture_last_ptr != 0 && !capture_last_found)
2294       {
2295       shared_srcw[2] = common->capture_last_ptr;
2296       shared_count = 3;
2297       capture_last_found = TRUE;
2298       }
2299 
2300     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2301       {
2302       private_count = 1;
2303       private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2304       }
2305     cc += 1 + LINK_SIZE + IMM2_SIZE;
2306     break;
2307 
2308     case OP_CBRAPOS:
2309     case OP_SCBRAPOS:
2310     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2311     shared_srcw[0] = OVECTOR(offset);
2312     shared_srcw[1] = OVECTOR(offset + 1);
2313     shared_count = 2;
2314 
2315     if (common->capture_last_ptr != 0 && !capture_last_found)
2316       {
2317       shared_srcw[2] = common->capture_last_ptr;
2318       shared_count = 3;
2319       capture_last_found = TRUE;
2320       }
2321 
2322     private_count = 2;
2323     private_srcw[0] = PRIVATE_DATA(cc);
2324     private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2325     cc += 1 + LINK_SIZE + IMM2_SIZE;
2326     break;
2327 
2328     case OP_COND:
2329     /* Might be a hidden SCOND. */
2330     alternative = cc + GET(cc, 1);
2331     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2332       {
2333       private_count = 1;
2334       private_srcw[0] = PRIVATE_DATA(cc);
2335       }
2336     cc += 1 + LINK_SIZE;
2337     break;
2338 
2339     CASE_ITERATOR_PRIVATE_DATA_1
2340     if (PRIVATE_DATA(cc))
2341       {
2342       private_count = 1;
2343       private_srcw[0] = PRIVATE_DATA(cc);
2344       }
2345     cc += 2;
2346 #ifdef SUPPORT_UNICODE
2347     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2348 #endif
2349     break;
2350 
2351     CASE_ITERATOR_PRIVATE_DATA_2A
2352     if (PRIVATE_DATA(cc))
2353       {
2354       private_count = 2;
2355       private_srcw[0] = PRIVATE_DATA(cc);
2356       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2357       }
2358     cc += 2;
2359 #ifdef SUPPORT_UNICODE
2360     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2361 #endif
2362     break;
2363 
2364     CASE_ITERATOR_PRIVATE_DATA_2B
2365     if (PRIVATE_DATA(cc))
2366       {
2367       private_count = 2;
2368       private_srcw[0] = PRIVATE_DATA(cc);
2369       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2370       }
2371     cc += 2 + IMM2_SIZE;
2372 #ifdef SUPPORT_UNICODE
2373     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2374 #endif
2375     break;
2376 
2377     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2378     if (PRIVATE_DATA(cc))
2379       {
2380       private_count = 1;
2381       private_srcw[0] = PRIVATE_DATA(cc);
2382       }
2383     cc += 1;
2384     break;
2385 
2386     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2387     if (PRIVATE_DATA(cc))
2388       {
2389       private_count = 2;
2390       private_srcw[0] = PRIVATE_DATA(cc);
2391       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2392       }
2393     cc += 1;
2394     break;
2395 
2396     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2397     if (PRIVATE_DATA(cc))
2398       {
2399       private_count = 2;
2400       private_srcw[0] = PRIVATE_DATA(cc);
2401       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2402       }
2403     cc += 1 + IMM2_SIZE;
2404     break;
2405 
2406     case OP_CLASS:
2407     case OP_NCLASS:
2408 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2409     case OP_XCLASS:
2410     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2411 #else
2412     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2413 #endif
2414     if (PRIVATE_DATA(cc) != 0)
2415       switch(get_class_iterator_size(cc + i))
2416         {
2417         case 1:
2418         private_count = 1;
2419         private_srcw[0] = PRIVATE_DATA(cc);
2420         break;
2421 
2422         case 2:
2423         private_count = 2;
2424         private_srcw[0] = PRIVATE_DATA(cc);
2425         private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2426         break;
2427 
2428         default:
2429         SLJIT_UNREACHABLE();
2430         break;
2431         }
2432     cc += i;
2433     break;
2434 
2435     case OP_MARK:
2436     case OP_COMMIT_ARG:
2437     case OP_PRUNE_ARG:
2438     case OP_THEN_ARG:
2439     SLJIT_ASSERT(common->mark_ptr != 0);
2440     if (has_quit && !setmark_found)
2441       {
2442       kept_shared_srcw[0] = common->mark_ptr;
2443       kept_shared_count = 1;
2444       setmark_found = TRUE;
2445       }
2446     if (common->control_head_ptr != 0 && !control_head_found)
2447       {
2448       shared_srcw[0] = common->control_head_ptr;
2449       shared_count = 1;
2450       control_head_found = TRUE;
2451       }
2452     cc += 1 + 2 + cc[1];
2453     break;
2454 
2455     case OP_THEN:
2456     SLJIT_ASSERT(common->control_head_ptr != 0);
2457     if (!control_head_found)
2458       {
2459       shared_srcw[0] = common->control_head_ptr;
2460       shared_count = 1;
2461       control_head_found = TRUE;
2462       }
2463     cc++;
2464     break;
2465 
2466     default:
2467     cc = next_opcode(common, cc);
2468     SLJIT_ASSERT(cc != NULL);
2469     break;
2470     }
2471 
2472   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2473     {
2474     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2475 
2476     for (i = 0; i < private_count; i++)
2477       {
2478       SLJIT_ASSERT(private_srcw[i] != 0);
2479 
2480       if (!from_sp)
2481         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2482 
2483       if (from_sp || type == recurse_swap_global)
2484         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2485 
2486       stackptr += sizeof(sljit_sw);
2487       }
2488     }
2489   else
2490     stackptr += sizeof(sljit_sw) * private_count;
2491 
2492   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2493     {
2494     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2495 
2496     for (i = 0; i < shared_count; i++)
2497       {
2498       SLJIT_ASSERT(shared_srcw[i] != 0);
2499 
2500       if (!from_sp)
2501         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2502 
2503       if (from_sp || type == recurse_swap_global)
2504         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2505 
2506       stackptr += sizeof(sljit_sw);
2507       }
2508     }
2509   else
2510     stackptr += sizeof(sljit_sw) * shared_count;
2511 
2512   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2513     {
2514     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2515 
2516     for (i = 0; i < kept_shared_count; i++)
2517       {
2518       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2519 
2520       if (!from_sp)
2521         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2522 
2523       if (from_sp || type == recurse_swap_global)
2524         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2525 
2526       stackptr += sizeof(sljit_sw);
2527       }
2528     }
2529   else
2530     stackptr += sizeof(sljit_sw) * kept_shared_count;
2531   }
2532 
2533 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2534 
2535 delayed_mem_copy_finish(&status);
2536 }
2537 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2538 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2539 {
2540 PCRE2_SPTR end = bracketend(cc);
2541 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2542 
2543 /* Assert captures then. */
2544 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2545   current_offset = NULL;
2546 /* Conditional block does not. */
2547 if (*cc == OP_COND || *cc == OP_SCOND)
2548   has_alternatives = FALSE;
2549 
2550 cc = next_opcode(common, cc);
2551 if (has_alternatives)
2552   current_offset = common->then_offsets + (cc - common->start);
2553 
2554 while (cc < end)
2555   {
2556   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2557     cc = set_then_offsets(common, cc, current_offset);
2558   else
2559     {
2560     if (*cc == OP_ALT && has_alternatives)
2561       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2562     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2563       *current_offset = 1;
2564     cc = next_opcode(common, cc);
2565     }
2566   }
2567 
2568 return end;
2569 }
2570 
2571 #undef CASE_ITERATOR_PRIVATE_DATA_1
2572 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2573 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2574 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2575 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2576 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2577 
is_powerof2(unsigned int value)2578 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2579 {
2580 return (value & (value - 1)) == 0;
2581 }
2582 
set_jumps(jump_list * list,struct sljit_label * label)2583 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2584 {
2585 while (list)
2586   {
2587   /* sljit_set_label is clever enough to do nothing
2588   if either the jump or the label is NULL. */
2589   SET_LABEL(list->jump, label);
2590   list = list->next;
2591   }
2592 }
2593 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2594 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2595 {
2596 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2597 if (list_item)
2598   {
2599   list_item->next = *list;
2600   list_item->jump = jump;
2601   *list = list_item;
2602   }
2603 }
2604 
add_stub(compiler_common * common,struct sljit_jump * start)2605 static void add_stub(compiler_common *common, struct sljit_jump *start)
2606 {
2607 DEFINE_COMPILER;
2608 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2609 
2610 if (list_item)
2611   {
2612   list_item->start = start;
2613   list_item->quit = LABEL();
2614   list_item->next = common->stubs;
2615   common->stubs = list_item;
2616   }
2617 }
2618 
flush_stubs(compiler_common * common)2619 static void flush_stubs(compiler_common *common)
2620 {
2621 DEFINE_COMPILER;
2622 stub_list *list_item = common->stubs;
2623 
2624 while (list_item)
2625   {
2626   JUMPHERE(list_item->start);
2627   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2628   JUMPTO(SLJIT_JUMP, list_item->quit);
2629   list_item = list_item->next;
2630   }
2631 common->stubs = NULL;
2632 }
2633 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2634 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2635 {
2636 DEFINE_COMPILER;
2637 label_addr_list *label_addr;
2638 
2639 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2640 if (label_addr == NULL)
2641   return;
2642 label_addr->label = LABEL();
2643 label_addr->update_addr = update_addr;
2644 label_addr->next = common->label_addrs;
2645 common->label_addrs = label_addr;
2646 }
2647 
count_match(compiler_common * common)2648 static SLJIT_INLINE void count_match(compiler_common *common)
2649 {
2650 DEFINE_COMPILER;
2651 
2652 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2653 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2654 }
2655 
allocate_stack(compiler_common * common,int size)2656 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2657 {
2658 /* May destroy all locals and registers except TMP2. */
2659 DEFINE_COMPILER;
2660 
2661 SLJIT_ASSERT(size > 0);
2662 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2663 #ifdef DESTROY_REGISTERS
2664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2665 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2666 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2668 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2669 #endif
2670 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2671 }
2672 
free_stack(compiler_common * common,int size)2673 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2674 {
2675 DEFINE_COMPILER;
2676 
2677 SLJIT_ASSERT(size > 0);
2678 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2679 }
2680 
allocate_read_only_data(compiler_common * common,sljit_uw size)2681 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2682 {
2683 DEFINE_COMPILER;
2684 sljit_uw *result;
2685 
2686 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2687   return NULL;
2688 
2689 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2690 if (SLJIT_UNLIKELY(result == NULL))
2691   {
2692   sljit_set_compiler_memory_error(compiler);
2693   return NULL;
2694   }
2695 
2696 *(void**)result = common->read_only_data_head;
2697 common->read_only_data_head = (void *)result;
2698 return result + 1;
2699 }
2700 
reset_ovector(compiler_common * common,int length)2701 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2702 {
2703 DEFINE_COMPILER;
2704 struct sljit_label *loop;
2705 sljit_s32 i;
2706 
2707 /* At this point we can freely use all temporary registers. */
2708 SLJIT_ASSERT(length > 1);
2709 /* TMP1 returns with begin - 1. */
2710 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2711 if (length < 8)
2712   {
2713   for (i = 1; i < length; i++)
2714     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2715   }
2716 else
2717   {
2718   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2719     {
2720     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2721     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2722     loop = LABEL();
2723     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2724     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2725     JUMPTO(SLJIT_NOT_ZERO, loop);
2726     }
2727   else
2728     {
2729     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2730     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2731     loop = LABEL();
2732     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2733     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2734     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2735     JUMPTO(SLJIT_NOT_ZERO, loop);
2736     }
2737   }
2738 }
2739 
reset_fast_fail(compiler_common * common)2740 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2741 {
2742 DEFINE_COMPILER;
2743 sljit_s32 i;
2744 
2745 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2746 
2747 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2749   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2750 }
2751 
do_reset_match(compiler_common * common,int length)2752 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2753 {
2754 DEFINE_COMPILER;
2755 struct sljit_label *loop;
2756 int i;
2757 
2758 SLJIT_ASSERT(length > 1);
2759 /* OVECTOR(1) contains the "string begin - 1" constant. */
2760 if (length > 2)
2761   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2762 if (length < 8)
2763   {
2764   for (i = 2; i < length; i++)
2765     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2766   }
2767 else
2768   {
2769   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2770     {
2771     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2772     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2773     loop = LABEL();
2774     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2775     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2776     JUMPTO(SLJIT_NOT_ZERO, loop);
2777     }
2778   else
2779     {
2780     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2781     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2782     loop = LABEL();
2783     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2784     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2785     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2786     JUMPTO(SLJIT_NOT_ZERO, loop);
2787     }
2788   }
2789 
2790 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2791 if (common->mark_ptr != 0)
2792   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2793 if (common->control_head_ptr != 0)
2794   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2795 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2797 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2798 }
2799 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2800 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2801 {
2802 while (current != NULL)
2803   {
2804   switch (current[1])
2805     {
2806     case type_then_trap:
2807     break;
2808 
2809     case type_mark:
2810     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
2811       return current[3];
2812     break;
2813 
2814     default:
2815     SLJIT_UNREACHABLE();
2816     break;
2817     }
2818   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2819   current = (sljit_sw*)current[0];
2820   }
2821 return 0;
2822 }
2823 
copy_ovector(compiler_common * common,int topbracket)2824 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2825 {
2826 DEFINE_COMPILER;
2827 struct sljit_label *loop;
2828 BOOL has_pre;
2829 
2830 /* At this point we can freely use all registers. */
2831 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2833 
2834 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2835 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2836 if (common->mark_ptr != 0)
2837   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2838 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
2839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
2840 if (common->mark_ptr != 0)
2841   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2842 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
2843   SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
2844 
2845 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2846 
2847 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2848 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2849 
2850 loop = LABEL();
2851 
2852 if (has_pre)
2853   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2854 else
2855   {
2856   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2857   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2858   }
2859 
2860 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
2861 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2862 /* Copy the integer value to the output buffer */
2863 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2864 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2865 #endif
2866 
2867 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
2868 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2869 
2870 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2871 JUMPTO(SLJIT_NOT_ZERO, loop);
2872 
2873 /* Calculate the return value, which is the maximum ovector value. */
2874 if (topbracket > 1)
2875   {
2876   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2877     {
2878     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2879     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2880 
2881     /* OVECTOR(0) is never equal to SLJIT_S2. */
2882     loop = LABEL();
2883     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2884     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2885     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2886     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2887     }
2888   else
2889     {
2890     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2891     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2892 
2893     /* OVECTOR(0) is never equal to SLJIT_S2. */
2894     loop = LABEL();
2895     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2896     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2897     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2898     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2899     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2900     }
2901   }
2902 else
2903   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2904 }
2905 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2906 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2907 {
2908 DEFINE_COMPILER;
2909 sljit_s32 mov_opcode;
2910 
2911 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
2912 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2913   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
2914 
2915 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2916 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
2917   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
2918 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
2919 
2920 /* Store match begin and end. */
2921 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
2923 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
2924 
2925 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
2926 
2927 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
2928 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2929 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2930 #endif
2931 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
2932 
2933 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
2934 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2935 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
2936 #endif
2937 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
2938 
2939 JUMPTO(SLJIT_JUMP, quit);
2940 }
2941 
check_start_used_ptr(compiler_common * common)2942 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2943 {
2944 /* May destroy TMP1. */
2945 DEFINE_COMPILER;
2946 struct sljit_jump *jump;
2947 
2948 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2949   {
2950   /* The value of -1 must be kept for start_used_ptr! */
2951   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2952   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2953   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2954   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2955   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2956   JUMPHERE(jump);
2957   }
2958 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
2959   {
2960   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2961   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2962   JUMPHERE(jump);
2963   }
2964 }
2965 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)2966 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
2967 {
2968 /* Detects if the character has an othercase. */
2969 unsigned int c;
2970 
2971 #ifdef SUPPORT_UNICODE
2972 if (common->utf)
2973   {
2974   GETCHAR(c, cc);
2975   if (c > 127)
2976     {
2977     return c != UCD_OTHERCASE(c);
2978     }
2979 #if PCRE2_CODE_UNIT_WIDTH != 8
2980   return common->fcc[c] != c;
2981 #endif
2982   }
2983 else
2984 #endif
2985   c = *cc;
2986 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2987 }
2988 
char_othercase(compiler_common * common,unsigned int c)2989 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2990 {
2991 /* Returns with the othercase. */
2992 #ifdef SUPPORT_UNICODE
2993 if (common->utf && c > 127)
2994   {
2995   return UCD_OTHERCASE(c);
2996   }
2997 #endif
2998 return TABLE_GET(c, common->fcc, c);
2999 }
3000 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3001 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3002 {
3003 /* Detects if the character and its othercase has only 1 bit difference. */
3004 unsigned int c, oc, bit;
3005 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3006 int n;
3007 #endif
3008 
3009 #ifdef SUPPORT_UNICODE
3010 if (common->utf)
3011   {
3012   GETCHAR(c, cc);
3013   if (c <= 127)
3014     oc = common->fcc[c];
3015   else
3016     {
3017     oc = UCD_OTHERCASE(c);
3018     }
3019   }
3020 else
3021   {
3022   c = *cc;
3023   oc = TABLE_GET(c, common->fcc, c);
3024   }
3025 #else
3026 c = *cc;
3027 oc = TABLE_GET(c, common->fcc, c);
3028 #endif
3029 
3030 SLJIT_ASSERT(c != oc);
3031 
3032 bit = c ^ oc;
3033 /* Optimized for English alphabet. */
3034 if (c <= 127 && bit == 0x20)
3035   return (0 << 8) | 0x20;
3036 
3037 /* Since c != oc, they must have at least 1 bit difference. */
3038 if (!is_powerof2(bit))
3039   return 0;
3040 
3041 #if PCRE2_CODE_UNIT_WIDTH == 8
3042 
3043 #ifdef SUPPORT_UNICODE
3044 if (common->utf && c > 127)
3045   {
3046   n = GET_EXTRALEN(*cc);
3047   while ((bit & 0x3f) == 0)
3048     {
3049     n--;
3050     bit >>= 6;
3051     }
3052   return (n << 8) | bit;
3053   }
3054 #endif /* SUPPORT_UNICODE */
3055 return (0 << 8) | bit;
3056 
3057 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3058 
3059 #ifdef SUPPORT_UNICODE
3060 if (common->utf && c > 65535)
3061   {
3062   if (bit >= (1 << 10))
3063     bit >>= 10;
3064   else
3065     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3066   }
3067 #endif /* SUPPORT_UNICODE */
3068 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
3069 
3070 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3071 }
3072 
check_partial(compiler_common * common,BOOL force)3073 static void check_partial(compiler_common *common, BOOL force)
3074 {
3075 /* Checks whether a partial matching is occurred. Does not modify registers. */
3076 DEFINE_COMPILER;
3077 struct sljit_jump *jump = NULL;
3078 
3079 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3080 
3081 if (common->mode == PCRE2_JIT_COMPLETE)
3082   return;
3083 
3084 if (!force)
3085   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3086 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3087   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3088 
3089 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3090   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3091 else
3092   {
3093   if (common->partialmatchlabel != NULL)
3094     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3095   else
3096     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3097   }
3098 
3099 if (jump != NULL)
3100   JUMPHERE(jump);
3101 }
3102 
check_str_end(compiler_common * common,jump_list ** end_reached)3103 static void check_str_end(compiler_common *common, jump_list **end_reached)
3104 {
3105 /* Does not affect registers. Usually used in a tight spot. */
3106 DEFINE_COMPILER;
3107 struct sljit_jump *jump;
3108 
3109 if (common->mode == PCRE2_JIT_COMPLETE)
3110   {
3111   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3112   return;
3113   }
3114 
3115 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3116 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3117   {
3118   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3119   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3120   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3121   }
3122 else
3123   {
3124   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3125   if (common->partialmatchlabel != NULL)
3126     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3127   else
3128     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3129   }
3130 JUMPHERE(jump);
3131 }
3132 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3133 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3134 {
3135 DEFINE_COMPILER;
3136 struct sljit_jump *jump;
3137 
3138 if (common->mode == PCRE2_JIT_COMPLETE)
3139   {
3140   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3141   return;
3142   }
3143 
3144 /* Partial matching mode. */
3145 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3146 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3147 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3148   {
3149   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3150   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3151   }
3152 else
3153   {
3154   if (common->partialmatchlabel != NULL)
3155     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3156   else
3157     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3158   }
3159 JUMPHERE(jump);
3160 }
3161 
peek_char(compiler_common * common,sljit_u32 max)3162 static void peek_char(compiler_common *common, sljit_u32 max)
3163 {
3164 /* Reads the character into TMP1, keeps STR_PTR.
3165 Does not check STR_END. TMP2 Destroyed. */
3166 DEFINE_COMPILER;
3167 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3168 struct sljit_jump *jump;
3169 #endif
3170 
3171 SLJIT_UNUSED_ARG(max);
3172 
3173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3174 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3175 if (common->utf)
3176   {
3177   if (max < 128) return;
3178 
3179   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3180   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3181   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3182   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3183   JUMPHERE(jump);
3184   }
3185 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3186 
3187 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3188 if (common->utf)
3189   {
3190   if (max < 0xd800) return;
3191 
3192   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3193   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3194   /* TMP2 contains the high surrogate. */
3195   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3196   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3197   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3198   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3199   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3200   JUMPHERE(jump);
3201   }
3202 #endif
3203 }
3204 
3205 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3206 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)3207 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
3208 {
3209 /* Tells whether the character codes below 128 are enough
3210 to determine a match. */
3211 const sljit_u8 value = nclass ? 0xff : 0;
3212 const sljit_u8 *end = bitset + 32;
3213 
3214 bitset += 16;
3215 do
3216   {
3217   if (*bitset++ != value)
3218     return FALSE;
3219   }
3220 while (bitset < end);
3221 return TRUE;
3222 }
3223 
read_char7_type(compiler_common * common,BOOL full_read)3224 static void read_char7_type(compiler_common *common, BOOL full_read)
3225 {
3226 /* Reads the precise character type of a character into TMP1, if the character
3227 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
3228 full_read argument tells whether characters above max are accepted or not. */
3229 DEFINE_COMPILER;
3230 struct sljit_jump *jump;
3231 
3232 SLJIT_ASSERT(common->utf);
3233 
3234 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3236 
3237 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3238 
3239 if (full_read)
3240   {
3241   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3242   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3243   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3244   JUMPHERE(jump);
3245   }
3246 }
3247 
3248 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3249 
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)3250 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
3251 {
3252 /* Reads the precise value of a character into TMP1, if the character is
3253 between min and max (c >= min && c <= max). Otherwise it returns with a value
3254 outside the range. Does not check STR_END. */
3255 DEFINE_COMPILER;
3256 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3257 struct sljit_jump *jump;
3258 #endif
3259 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3260 struct sljit_jump *jump2;
3261 #endif
3262 
3263 SLJIT_UNUSED_ARG(update_str_ptr);
3264 SLJIT_UNUSED_ARG(min);
3265 SLJIT_UNUSED_ARG(max);
3266 SLJIT_ASSERT(min <= max);
3267 
3268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3270 
3271 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3272 if (common->utf)
3273   {
3274   if (max < 128 && !update_str_ptr) return;
3275 
3276   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3277   if (min >= 0x10000)
3278     {
3279     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3280     if (update_str_ptr)
3281       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3282     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3283     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3284     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3285     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3286     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3287     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3289     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3290     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3291     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3292     if (!update_str_ptr)
3293       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3294     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3295     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3296     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3297     JUMPHERE(jump2);
3298     if (update_str_ptr)
3299       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3300     }
3301   else if (min >= 0x800 && max <= 0xffff)
3302     {
3303     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3304     if (update_str_ptr)
3305       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3306     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3307     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3308     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3309     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3310     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3311     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3312     if (!update_str_ptr)
3313       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3314     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3315     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3316     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3317     JUMPHERE(jump2);
3318     if (update_str_ptr)
3319       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3320     }
3321   else if (max >= 0x800)
3322     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3323   else if (max < 128)
3324     {
3325     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3326     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3327     }
3328   else
3329     {
3330     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3331     if (!update_str_ptr)
3332       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3333     else
3334       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3335     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3336     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3337     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3338     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3339     if (update_str_ptr)
3340       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3341     }
3342   JUMPHERE(jump);
3343   }
3344 #endif
3345 
3346 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3347 if (common->utf)
3348   {
3349   if (max >= 0x10000)
3350     {
3351     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3352     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3353     /* TMP2 contains the high surrogate. */
3354     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3355     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3356     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3357     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3358     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3359     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3360     JUMPHERE(jump);
3361     return;
3362     }
3363 
3364   if (max < 0xd800 && !update_str_ptr) return;
3365 
3366   /* Skip low surrogate if necessary. */
3367   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3368   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3369   if (update_str_ptr)
3370     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3371   if (max >= 0xd800)
3372     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3373   JUMPHERE(jump);
3374   }
3375 #endif
3376 }
3377 
read_char(compiler_common * common)3378 static SLJIT_INLINE void read_char(compiler_common *common)
3379 {
3380 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3381 }
3382 
read_char8_type(compiler_common * common,BOOL update_str_ptr)3383 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3384 {
3385 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3386 DEFINE_COMPILER;
3387 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3388 struct sljit_jump *jump;
3389 #endif
3390 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3391 struct sljit_jump *jump2;
3392 #endif
3393 
3394 SLJIT_UNUSED_ARG(update_str_ptr);
3395 
3396 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3398 
3399 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3400 if (common->utf)
3401   {
3402   /* This can be an extra read in some situations, but hopefully
3403   it is needed in most cases. */
3404   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3405   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3406   if (!update_str_ptr)
3407     {
3408     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3409     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3410     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3411     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3412     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3413     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3414     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3415     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3416     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3417     JUMPHERE(jump2);
3418     }
3419   else
3420     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3421   JUMPHERE(jump);
3422   return;
3423   }
3424 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3425 
3426 #if PCRE2_CODE_UNIT_WIDTH != 8
3427 /* The ctypes array contains only 256 values. */
3428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3429 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3430 #endif
3431 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3432 #if PCRE2_CODE_UNIT_WIDTH != 8
3433 JUMPHERE(jump);
3434 #endif
3435 
3436 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3437 if (common->utf && update_str_ptr)
3438   {
3439   /* Skip low surrogate if necessary. */
3440   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3441   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3442   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3443   JUMPHERE(jump);
3444   }
3445 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3446 }
3447 
skip_char_back(compiler_common * common)3448 static void skip_char_back(compiler_common *common)
3449 {
3450 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3451 DEFINE_COMPILER;
3452 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3453 #if PCRE2_CODE_UNIT_WIDTH == 8
3454 struct sljit_label *label;
3455 
3456 if (common->utf)
3457   {
3458   label = LABEL();
3459   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3460   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3461   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3462   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3463   return;
3464   }
3465 #elif PCRE2_CODE_UNIT_WIDTH == 16
3466 if (common->utf)
3467   {
3468   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3469   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470   /* Skip low surrogate if necessary. */
3471   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3472   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3473   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3474   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3475   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3476   return;
3477   }
3478 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3479 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3480 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3481 }
3482 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3483 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3484 {
3485 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3486 DEFINE_COMPILER;
3487 struct sljit_jump *jump;
3488 
3489 if (nltype == NLTYPE_ANY)
3490   {
3491   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3492   sljit_set_current_flags(compiler, SLJIT_SET_Z);
3493   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3494   }
3495 else if (nltype == NLTYPE_ANYCRLF)
3496   {
3497   if (jumpifmatch)
3498     {
3499     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3500     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3501     }
3502   else
3503     {
3504     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3505     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3506     JUMPHERE(jump);
3507     }
3508   }
3509 else
3510   {
3511   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3512   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3513   }
3514 }
3515 
3516 #ifdef SUPPORT_UNICODE
3517 
3518 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3519 static void do_utfreadchar(compiler_common *common)
3520 {
3521 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3522 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3523 DEFINE_COMPILER;
3524 struct sljit_jump *jump;
3525 
3526 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3527 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3528 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3529 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3530 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3531 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3532 
3533 /* Searching for the first zero. */
3534 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3535 jump = JUMP(SLJIT_NOT_ZERO);
3536 /* Two byte sequence. */
3537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3538 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3539 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3540 
3541 JUMPHERE(jump);
3542 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3543 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3544 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3546 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3547 
3548 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3549 jump = JUMP(SLJIT_NOT_ZERO);
3550 /* Three byte sequence. */
3551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3552 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3553 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3554 
3555 /* Four byte sequence. */
3556 JUMPHERE(jump);
3557 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3558 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3559 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3561 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3564 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3565 }
3566 
do_utfreadchar16(compiler_common * common)3567 static void do_utfreadchar16(compiler_common *common)
3568 {
3569 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3570 of the character (>= 0xc0). Return value in TMP1. */
3571 DEFINE_COMPILER;
3572 struct sljit_jump *jump;
3573 
3574 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3575 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3576 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3577 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3578 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3579 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3580 
3581 /* Searching for the first zero. */
3582 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3583 jump = JUMP(SLJIT_NOT_ZERO);
3584 /* Two byte sequence. */
3585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3586 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3587 
3588 JUMPHERE(jump);
3589 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3590 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3591 /* This code runs only in 8 bit mode. No need to shift the value. */
3592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3594 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3595 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3596 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3597 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3598 /* Three byte sequence. */
3599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3600 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3601 }
3602 
do_utfreadtype8(compiler_common * common)3603 static void do_utfreadtype8(compiler_common *common)
3604 {
3605 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3606 of the character (>= 0xc0). Return value in TMP1. */
3607 DEFINE_COMPILER;
3608 struct sljit_jump *jump;
3609 struct sljit_jump *compare;
3610 
3611 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3612 
3613 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3614 jump = JUMP(SLJIT_NOT_ZERO);
3615 /* Two byte sequence. */
3616 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3618 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3619 /* The upper 5 bits are known at this point. */
3620 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3621 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3622 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3623 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3624 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3625 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3626 
3627 JUMPHERE(compare);
3628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3629 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3630 
3631 /* We only have types for characters less than 256. */
3632 JUMPHERE(jump);
3633 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3636 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3637 }
3638 
3639 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3640 
3641 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3642 #define UCD_BLOCK_MASK 127
3643 #define UCD_BLOCK_SHIFT 7
3644 
do_getucd(compiler_common * common)3645 static void do_getucd(compiler_common *common)
3646 {
3647 /* Search the UCD record for the character comes in TMP1.
3648 Returns chartype in TMP1 and UCD offset in TMP2. */
3649 DEFINE_COMPILER;
3650 #if PCRE2_CODE_UNIT_WIDTH == 32
3651 struct sljit_jump *jump;
3652 #endif
3653 
3654 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3655 /* dummy_ucd_record */
3656 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3657 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3658 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3659 #endif
3660 
3661 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3662 
3663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3664 
3665 #if PCRE2_CODE_UNIT_WIDTH == 32
3666 if (!common->utf)
3667   {
3668   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
3669   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3670   JUMPHERE(jump);
3671   }
3672 #endif
3673 
3674 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3675 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3676 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3677 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3678 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3681 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3683 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3684 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3685 }
3686 
3687 #endif /* SUPPORT_UNICODE */
3688 
mainloop_entry(compiler_common * common)3689 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
3690 {
3691 DEFINE_COMPILER;
3692 struct sljit_label *mainloop;
3693 struct sljit_label *newlinelabel = NULL;
3694 struct sljit_jump *start;
3695 struct sljit_jump *end = NULL;
3696 struct sljit_jump *end2 = NULL;
3697 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3698 struct sljit_jump *singlechar;
3699 #endif
3700 jump_list *newline = NULL;
3701 sljit_u32 overall_options = common->re->overall_options;
3702 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
3703 BOOL newlinecheck = FALSE;
3704 BOOL readuchar = FALSE;
3705 
3706 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
3707     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3708   newlinecheck = TRUE;
3709 
3710 SLJIT_ASSERT(common->abort_label == NULL);
3711 
3712 if ((overall_options & PCRE2_FIRSTLINE) != 0)
3713   {
3714   /* Search for the end of the first line. */
3715   SLJIT_ASSERT(common->match_end_ptr != 0);
3716   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3717 
3718   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3719     {
3720     mainloop = LABEL();
3721     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3722     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3723     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3724     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3725     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3726     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3727     JUMPHERE(end);
3728     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3729     }
3730   else
3731     {
3732     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3733     mainloop = LABEL();
3734     /* Continual stores does not cause data dependency. */
3735     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3736     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3737     check_newlinechar(common, common->nltype, &newline, TRUE);
3738     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3739     JUMPHERE(end);
3740     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3741     set_jumps(newline, LABEL());
3742     }
3743 
3744   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3745   }
3746 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
3747   {
3748   /* Check whether offset limit is set and valid. */
3749   SLJIT_ASSERT(common->match_end_ptr != 0);
3750 
3751   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3752   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
3753   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3754   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
3755   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3756 #if PCRE2_CODE_UNIT_WIDTH == 16
3757   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3758 #elif PCRE2_CODE_UNIT_WIDTH == 32
3759   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
3760 #endif
3761   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3762   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3763   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
3764   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3765   JUMPHERE(end2);
3766   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
3767   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
3768   JUMPHERE(end);
3769   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
3770   }
3771 
3772 start = JUMP(SLJIT_JUMP);
3773 
3774 if (newlinecheck)
3775   {
3776   newlinelabel = LABEL();
3777   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3778   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3779   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3780   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3781   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3782 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3783   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3784 #endif
3785   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3786   end2 = JUMP(SLJIT_JUMP);
3787   }
3788 
3789 mainloop = LABEL();
3790 
3791 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3792 #ifdef SUPPORT_UNICODE
3793 if (common->utf) readuchar = TRUE;
3794 #endif
3795 if (newlinecheck) readuchar = TRUE;
3796 
3797 if (readuchar)
3798   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3799 
3800 if (newlinecheck)
3801   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3802 
3803 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3804 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3805 #if PCRE2_CODE_UNIT_WIDTH == 8
3806 if (common->utf)
3807   {
3808   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3809   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3810   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3811   JUMPHERE(singlechar);
3812   }
3813 #elif PCRE2_CODE_UNIT_WIDTH == 16
3814 if (common->utf)
3815   {
3816   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3817   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3818   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3819   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3820   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3821   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3822   JUMPHERE(singlechar);
3823   }
3824 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3825 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3826 JUMPHERE(start);
3827 
3828 if (newlinecheck)
3829   {
3830   JUMPHERE(end);
3831   JUMPHERE(end2);
3832   }
3833 
3834 return mainloop;
3835 }
3836 
3837 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)3838 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
3839 {
3840 sljit_u32 i, count = chars->count;
3841 
3842 if (count == 255)
3843   return;
3844 
3845 if (count == 0)
3846   {
3847   chars->count = 1;
3848   chars->chars[0] = chr;
3849 
3850   if (last)
3851     chars->last_count = 1;
3852   return;
3853   }
3854 
3855 for (i = 0; i < count; i++)
3856   if (chars->chars[i] == chr)
3857     return;
3858 
3859 if (count >= MAX_DIFF_CHARS)
3860   {
3861   chars->count = 255;
3862   return;
3863   }
3864 
3865 chars->chars[count] = chr;
3866 chars->count = count + 1;
3867 
3868 if (last)
3869   chars->last_count++;
3870 }
3871 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)3872 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
3873 {
3874 /* Recursive function, which scans prefix literals. */
3875 BOOL last, any, class, caseless;
3876 int len, repeat, len_save, consumed = 0;
3877 sljit_u32 chr; /* Any unicode character. */
3878 sljit_u8 *bytes, *bytes_end, byte;
3879 PCRE2_SPTR alternative, cc_save, oc;
3880 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3881 PCRE2_UCHAR othercase[4];
3882 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3883 PCRE2_UCHAR othercase[2];
3884 #else
3885 PCRE2_UCHAR othercase[1];
3886 #endif
3887 
3888 repeat = 1;
3889 while (TRUE)
3890   {
3891   if (*rec_count == 0)
3892     return 0;
3893   (*rec_count)--;
3894 
3895   last = TRUE;
3896   any = FALSE;
3897   class = FALSE;
3898   caseless = FALSE;
3899 
3900   switch (*cc)
3901     {
3902     case OP_CHARI:
3903     caseless = TRUE;
3904     /* Fall through */
3905     case OP_CHAR:
3906     last = FALSE;
3907     cc++;
3908     break;
3909 
3910     case OP_SOD:
3911     case OP_SOM:
3912     case OP_SET_SOM:
3913     case OP_NOT_WORD_BOUNDARY:
3914     case OP_WORD_BOUNDARY:
3915     case OP_EODN:
3916     case OP_EOD:
3917     case OP_CIRC:
3918     case OP_CIRCM:
3919     case OP_DOLL:
3920     case OP_DOLLM:
3921     /* Zero width assertions. */
3922     cc++;
3923     continue;
3924 
3925     case OP_ASSERT:
3926     case OP_ASSERT_NOT:
3927     case OP_ASSERTBACK:
3928     case OP_ASSERTBACK_NOT:
3929     cc = bracketend(cc);
3930     continue;
3931 
3932     case OP_PLUSI:
3933     case OP_MINPLUSI:
3934     case OP_POSPLUSI:
3935     caseless = TRUE;
3936     /* Fall through */
3937     case OP_PLUS:
3938     case OP_MINPLUS:
3939     case OP_POSPLUS:
3940     cc++;
3941     break;
3942 
3943     case OP_EXACTI:
3944     caseless = TRUE;
3945     /* Fall through */
3946     case OP_EXACT:
3947     repeat = GET2(cc, 1);
3948     last = FALSE;
3949     cc += 1 + IMM2_SIZE;
3950     break;
3951 
3952     case OP_QUERYI:
3953     case OP_MINQUERYI:
3954     case OP_POSQUERYI:
3955     caseless = TRUE;
3956     /* Fall through */
3957     case OP_QUERY:
3958     case OP_MINQUERY:
3959     case OP_POSQUERY:
3960     len = 1;
3961     cc++;
3962 #ifdef SUPPORT_UNICODE
3963     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3964 #endif
3965     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3966     if (max_chars == 0)
3967       return consumed;
3968     last = FALSE;
3969     break;
3970 
3971     case OP_KET:
3972     cc += 1 + LINK_SIZE;
3973     continue;
3974 
3975     case OP_ALT:
3976     cc += GET(cc, 1);
3977     continue;
3978 
3979     case OP_ONCE:
3980     case OP_BRA:
3981     case OP_BRAPOS:
3982     case OP_CBRA:
3983     case OP_CBRAPOS:
3984     alternative = cc + GET(cc, 1);
3985     while (*alternative == OP_ALT)
3986       {
3987       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3988       if (max_chars == 0)
3989         return consumed;
3990       alternative += GET(alternative, 1);
3991       }
3992 
3993     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3994       cc += IMM2_SIZE;
3995     cc += 1 + LINK_SIZE;
3996     continue;
3997 
3998     case OP_CLASS:
3999 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4000     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
4001       return consumed;
4002 #endif
4003     class = TRUE;
4004     break;
4005 
4006     case OP_NCLASS:
4007 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4008     if (common->utf) return consumed;
4009 #endif
4010     class = TRUE;
4011     break;
4012 
4013 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4014     case OP_XCLASS:
4015 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4016     if (common->utf) return consumed;
4017 #endif
4018     any = TRUE;
4019     cc += GET(cc, 1);
4020     break;
4021 #endif
4022 
4023     case OP_DIGIT:
4024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4025     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
4026       return consumed;
4027 #endif
4028     any = TRUE;
4029     cc++;
4030     break;
4031 
4032     case OP_WHITESPACE:
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
4035       return consumed;
4036 #endif
4037     any = TRUE;
4038     cc++;
4039     break;
4040 
4041     case OP_WORDCHAR:
4042 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4043     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
4044       return consumed;
4045 #endif
4046     any = TRUE;
4047     cc++;
4048     break;
4049 
4050     case OP_NOT:
4051     case OP_NOTI:
4052     cc++;
4053     /* Fall through. */
4054     case OP_NOT_DIGIT:
4055     case OP_NOT_WHITESPACE:
4056     case OP_NOT_WORDCHAR:
4057     case OP_ANY:
4058     case OP_ALLANY:
4059 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4060     if (common->utf) return consumed;
4061 #endif
4062     any = TRUE;
4063     cc++;
4064     break;
4065 
4066 #ifdef SUPPORT_UNICODE
4067     case OP_NOTPROP:
4068     case OP_PROP:
4069 #if PCRE2_CODE_UNIT_WIDTH != 32
4070     if (common->utf) return consumed;
4071 #endif
4072     any = TRUE;
4073     cc += 1 + 2;
4074     break;
4075 #endif
4076 
4077     case OP_TYPEEXACT:
4078     repeat = GET2(cc, 1);
4079     cc += 1 + IMM2_SIZE;
4080     continue;
4081 
4082     case OP_NOTEXACT:
4083     case OP_NOTEXACTI:
4084 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4085     if (common->utf) return consumed;
4086 #endif
4087     any = TRUE;
4088     repeat = GET2(cc, 1);
4089     cc += 1 + IMM2_SIZE + 1;
4090     break;
4091 
4092     default:
4093     return consumed;
4094     }
4095 
4096   if (any)
4097     {
4098     do
4099       {
4100       chars->count = 255;
4101 
4102       consumed++;
4103       if (--max_chars == 0)
4104         return consumed;
4105       chars++;
4106       }
4107     while (--repeat > 0);
4108 
4109     repeat = 1;
4110     continue;
4111     }
4112 
4113   if (class)
4114     {
4115     bytes = (sljit_u8*) (cc + 1);
4116     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
4117 
4118     switch (*cc)
4119       {
4120       case OP_CRSTAR:
4121       case OP_CRMINSTAR:
4122       case OP_CRPOSSTAR:
4123       case OP_CRQUERY:
4124       case OP_CRMINQUERY:
4125       case OP_CRPOSQUERY:
4126       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
4127       if (max_chars == 0)
4128         return consumed;
4129       break;
4130 
4131       default:
4132       case OP_CRPLUS:
4133       case OP_CRMINPLUS:
4134       case OP_CRPOSPLUS:
4135       break;
4136 
4137       case OP_CRRANGE:
4138       case OP_CRMINRANGE:
4139       case OP_CRPOSRANGE:
4140       repeat = GET2(cc, 1);
4141       if (repeat <= 0)
4142         return consumed;
4143       break;
4144       }
4145 
4146     do
4147       {
4148       if (bytes[31] & 0x80)
4149         chars->count = 255;
4150       else if (chars->count != 255)
4151         {
4152         bytes_end = bytes + 32;
4153         chr = 0;
4154         do
4155           {
4156           byte = *bytes++;
4157           SLJIT_ASSERT((chr & 0x7) == 0);
4158           if (byte == 0)
4159             chr += 8;
4160           else
4161             {
4162             do
4163               {
4164               if ((byte & 0x1) != 0)
4165                 add_prefix_char(chr, chars, TRUE);
4166               byte >>= 1;
4167               chr++;
4168               }
4169             while (byte != 0);
4170             chr = (chr + 7) & ~7;
4171             }
4172           }
4173         while (chars->count != 255 && bytes < bytes_end);
4174         bytes = bytes_end - 32;
4175         }
4176 
4177       consumed++;
4178       if (--max_chars == 0)
4179         return consumed;
4180       chars++;
4181       }
4182     while (--repeat > 0);
4183 
4184     switch (*cc)
4185       {
4186       case OP_CRSTAR:
4187       case OP_CRMINSTAR:
4188       case OP_CRPOSSTAR:
4189       return consumed;
4190 
4191       case OP_CRQUERY:
4192       case OP_CRMINQUERY:
4193       case OP_CRPOSQUERY:
4194       cc++;
4195       break;
4196 
4197       case OP_CRRANGE:
4198       case OP_CRMINRANGE:
4199       case OP_CRPOSRANGE:
4200       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
4201         return consumed;
4202       cc += 1 + 2 * IMM2_SIZE;
4203       break;
4204       }
4205 
4206     repeat = 1;
4207     continue;
4208     }
4209 
4210   len = 1;
4211 #ifdef SUPPORT_UNICODE
4212   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
4213 #endif
4214 
4215   if (caseless && char_has_othercase(common, cc))
4216     {
4217 #ifdef SUPPORT_UNICODE
4218     if (common->utf)
4219       {
4220       GETCHAR(chr, cc);
4221       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
4222         return consumed;
4223       }
4224     else
4225 #endif
4226       {
4227       chr = *cc;
4228       othercase[0] = TABLE_GET(chr, common->fcc, chr);
4229       }
4230     }
4231   else
4232     {
4233     caseless = FALSE;
4234     othercase[0] = 0; /* Stops compiler warning - PH */
4235     }
4236 
4237   len_save = len;
4238   cc_save = cc;
4239   while (TRUE)
4240     {
4241     oc = othercase;
4242     do
4243       {
4244       len--;
4245       consumed++;
4246 
4247       chr = *cc;
4248       add_prefix_char(*cc, chars, len == 0);
4249 
4250       if (caseless)
4251         add_prefix_char(*oc, chars, len == 0);
4252 
4253       if (--max_chars == 0)
4254         return consumed;
4255       chars++;
4256       cc++;
4257       oc++;
4258       }
4259     while (len > 0);
4260 
4261     if (--repeat == 0)
4262       break;
4263 
4264     len = len_save;
4265     cc = cc_save;
4266     }
4267 
4268   repeat = 1;
4269   if (last)
4270     return consumed;
4271   }
4272 }
4273 
4274 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)4275 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
4276 {
4277 #if PCRE2_CODE_UNIT_WIDTH == 8
4278 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
4279 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
4280 #elif PCRE2_CODE_UNIT_WIDTH == 16
4281 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
4282 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
4283 #else
4284 #error "Unknown code width"
4285 #endif
4286 }
4287 #endif
4288 
4289 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4290 
4291 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)4292 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
4293 {
4294 #if PCRE2_CODE_UNIT_WIDTH == 8
4295 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
4296 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
4297 #elif PCRE2_CODE_UNIT_WIDTH == 16
4298 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
4299 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
4300 #else
4301 #error "Unknown code width"
4302 #endif
4303 }
4304 #endif
4305 
character_to_int32(PCRE2_UCHAR chr)4306 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
4307 {
4308 sljit_s32 value = (sljit_s32)chr;
4309 #if PCRE2_CODE_UNIT_WIDTH == 8
4310 #define SSE2_COMPARE_TYPE_INDEX 0
4311 return (value << 24) | (value << 16) | (value << 8) | value;
4312 #elif PCRE2_CODE_UNIT_WIDTH == 16
4313 #define SSE2_COMPARE_TYPE_INDEX 1
4314 return (value << 16) | value;
4315 #elif PCRE2_CODE_UNIT_WIDTH == 32
4316 #define SSE2_COMPARE_TYPE_INDEX 2
4317 return value;
4318 #else
4319 #error "Unsupported unit width"
4320 #endif
4321 }
4322 
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg)4323 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
4324 {
4325 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4326 sljit_u8 instruction[5];
4327 #else
4328 sljit_u8 instruction[4];
4329 #endif
4330 
4331 SLJIT_ASSERT(dst_xmm_reg < 8);
4332 
4333 /* MOVDQA xmm1, xmm2/m128 */
4334 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4335 if (src_general_reg < 8)
4336   {
4337   instruction[0] = 0x66;
4338   instruction[1] = 0x0f;
4339   instruction[2] = 0x6f;
4340   instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
4341   sljit_emit_op_custom(compiler, instruction, 4);
4342   }
4343 else
4344   {
4345   instruction[0] = 0x66;
4346   instruction[1] = 0x41;
4347   instruction[2] = 0x0f;
4348   instruction[3] = 0x6f;
4349   instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
4350   sljit_emit_op_custom(compiler, instruction, 4);
4351   }
4352 #else
4353 instruction[0] = 0x66;
4354 instruction[1] = 0x0f;
4355 instruction[2] = 0x6f;
4356 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
4357 sljit_emit_op_custom(compiler, instruction, 4);
4358 #endif
4359 }
4360 
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_u32 bit,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)4361 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
4362   sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
4363 {
4364 sljit_u8 instruction[4];
4365 instruction[0] = 0x66;
4366 instruction[1] = 0x0f;
4367 
4368 if (char1 == char2 || bit != 0)
4369   {
4370   if (bit != 0)
4371     {
4372     /* POR xmm1, xmm2/m128 */
4373     /* instruction[0] = 0x66; */
4374     /* instruction[1] = 0x0f; */
4375     instruction[2] = 0xeb;
4376     instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
4377     sljit_emit_op_custom(compiler, instruction, 4);
4378     }
4379 
4380   /* PCMPEQB/W/D xmm1, xmm2/m128 */
4381   /* instruction[0] = 0x66; */
4382   /* instruction[1] = 0x0f; */
4383   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4384   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
4385   sljit_emit_op_custom(compiler, instruction, 4);
4386   }
4387 else
4388   {
4389   /* MOVDQA xmm1, xmm2/m128 */
4390   /* instruction[0] = 0x66; */
4391   /* instruction[1] = 0x0f; */
4392   instruction[2] = 0x6f;
4393   instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
4394   sljit_emit_op_custom(compiler, instruction, 4);
4395 
4396   /* PCMPEQB/W/D xmm1, xmm2/m128 */
4397   /* instruction[0] = 0x66; */
4398   /* instruction[1] = 0x0f; */
4399   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4400   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
4401   sljit_emit_op_custom(compiler, instruction, 4);
4402 
4403   instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
4404   sljit_emit_op_custom(compiler, instruction, 4);
4405 
4406   /* POR xmm1, xmm2/m128 */
4407   /* instruction[0] = 0x66; */
4408   /* instruction[1] = 0x0f; */
4409   instruction[2] = 0xeb;
4410   instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
4411   sljit_emit_op_custom(compiler, instruction, 4);
4412   }
4413 }
4414 
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4415 static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4416 {
4417 DEFINE_COMPILER;
4418 struct sljit_label *start;
4419 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4420 struct sljit_label *restart;
4421 #endif
4422 struct sljit_jump *quit;
4423 struct sljit_jump *partial_quit[2];
4424 sljit_u8 instruction[8];
4425 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
4426 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
4427 sljit_s32 data_ind = 0;
4428 sljit_s32 tmp_ind = 1;
4429 sljit_s32 cmp1_ind = 2;
4430 sljit_s32 cmp2_ind = 3;
4431 sljit_u32 bit = 0;
4432 
4433 SLJIT_UNUSED_ARG(offset);
4434 
4435 if (char1 != char2)
4436   {
4437   bit = char1 ^ char2;
4438   if (!is_powerof2(bit))
4439     bit = 0;
4440   }
4441 
4442 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4443 if (common->mode == PCRE2_JIT_COMPLETE)
4444   add_jump(compiler, &common->failed_match, partial_quit[0]);
4445 
4446 /* First part (unaligned start) */
4447 
4448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
4449 
4450 SLJIT_ASSERT(tmp1_ind < 8);
4451 
4452 /* MOVD xmm, r/m32 */
4453 instruction[0] = 0x66;
4454 instruction[1] = 0x0f;
4455 instruction[2] = 0x6e;
4456 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
4457 sljit_emit_op_custom(compiler, instruction, 4);
4458 
4459 if (char1 != char2)
4460   {
4461   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
4462 
4463   /* MOVD xmm, r/m32 */
4464   instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
4465   sljit_emit_op_custom(compiler, instruction, 4);
4466   }
4467 
4468 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4469 
4470 /* PSHUFD xmm1, xmm2/m128, imm8 */
4471 /* instruction[0] = 0x66; */
4472 /* instruction[1] = 0x0f; */
4473 instruction[2] = 0x70;
4474 instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
4475 instruction[4] = 0;
4476 sljit_emit_op_custom(compiler, instruction, 5);
4477 
4478 if (char1 != char2)
4479   {
4480   /* PSHUFD xmm1, xmm2/m128, imm8 */
4481   instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
4482   sljit_emit_op_custom(compiler, instruction, 5);
4483   }
4484 
4485 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4486 restart = LABEL();
4487 #endif
4488 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4489 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
4490 
4491 load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
4492 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
4493 
4494 /* PMOVMSKB reg, xmm */
4495 /* instruction[0] = 0x66; */
4496 /* instruction[1] = 0x0f; */
4497 instruction[2] = 0xd7;
4498 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4499 sljit_emit_op_custom(compiler, instruction, 4);
4500 
4501 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4502 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
4503 
4504 /* BSF r32, r/m32 */
4505 instruction[0] = 0x0f;
4506 instruction[1] = 0xbc;
4507 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4508 sljit_emit_op_custom(compiler, instruction, 3);
4509 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4510 
4511 quit = JUMP(SLJIT_NOT_ZERO);
4512 
4513 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4514 
4515 start = LABEL();
4516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4517 
4518 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4519 if (common->mode == PCRE2_JIT_COMPLETE)
4520   add_jump(compiler, &common->failed_match, partial_quit[1]);
4521 
4522 /* Second part (aligned) */
4523 
4524 load_from_mem_sse2(compiler, 0, str_ptr_ind);
4525 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
4526 
4527 /* PMOVMSKB reg, xmm */
4528 instruction[0] = 0x66;
4529 instruction[1] = 0x0f;
4530 instruction[2] = 0xd7;
4531 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4532 sljit_emit_op_custom(compiler, instruction, 4);
4533 
4534 /* BSF r32, r/m32 */
4535 instruction[0] = 0x0f;
4536 instruction[1] = 0xbc;
4537 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4538 sljit_emit_op_custom(compiler, instruction, 3);
4539 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4540 
4541 JUMPTO(SLJIT_ZERO, start);
4542 
4543 JUMPHERE(quit);
4544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4545 
4546 if (common->mode != PCRE2_JIT_COMPLETE)
4547   {
4548   JUMPHERE(partial_quit[0]);
4549   JUMPHERE(partial_quit[1]);
4550   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4551   CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
4552   }
4553 else
4554   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4555 
4556 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4557 if (common->utf && offset > 0)
4558   {
4559   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
4560 
4561   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4562 
4563   quit = jump_if_utf_char_start(compiler, TMP1);
4564 
4565   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4567   OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4568   JUMPTO(SLJIT_JUMP, restart);
4569 
4570   JUMPHERE(quit);
4571   }
4572 #endif
4573 }
4574 
4575 #ifndef _WIN64
4576 
max_fast_forward_char_pair_sse2_offset(void)4577 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
4578 {
4579 #if PCRE2_CODE_UNIT_WIDTH == 8
4580 return 15;
4581 #elif PCRE2_CODE_UNIT_WIDTH == 16
4582 return 7;
4583 #elif PCRE2_CODE_UNIT_WIDTH == 32
4584 return 3;
4585 #else
4586 #error "Unsupported unit width"
4587 #endif
4588 }
4589 
fast_forward_char_pair_sse2(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)4590 static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
4591   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
4592 {
4593 DEFINE_COMPILER;
4594 sljit_u32 bit1 = 0;
4595 sljit_u32 bit2 = 0;
4596 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
4597 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
4598 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
4599 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
4600 sljit_s32 data1_ind = 0;
4601 sljit_s32 data2_ind = 1;
4602 sljit_s32 tmp_ind = 2;
4603 sljit_s32 cmp1a_ind = 3;
4604 sljit_s32 cmp1b_ind = 4;
4605 sljit_s32 cmp2a_ind = 5;
4606 sljit_s32 cmp2b_ind = 6;
4607 struct sljit_label *start;
4608 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4609 struct sljit_label *restart;
4610 #endif
4611 struct sljit_jump *jump[2];
4612 
4613 sljit_u8 instruction[8];
4614 
4615 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
4616 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
4617 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
4618 
4619 /* Initialize. */
4620 if (common->match_end_ptr != 0)
4621   {
4622   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4623   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4624   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
4625 
4626   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
4627   CMOV(SLJIT_LESS, STR_END, TMP1, 0);
4628   }
4629 
4630 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
4631 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4632 
4633 /* MOVD xmm, r/m32 */
4634 instruction[0] = 0x66;
4635 instruction[1] = 0x0f;
4636 instruction[2] = 0x6e;
4637 
4638 if (char1a == char1b)
4639   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
4640 else
4641   {
4642   bit1 = char1a ^ char1b;
4643   if (is_powerof2(bit1))
4644     {
4645     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
4646     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
4647     }
4648   else
4649     {
4650     bit1 = 0;
4651     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
4652     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
4653     }
4654   }
4655 
4656 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
4657 sljit_emit_op_custom(compiler, instruction, 4);
4658 
4659 if (char1a != char1b)
4660   {
4661   instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
4662   sljit_emit_op_custom(compiler, instruction, 4);
4663   }
4664 
4665 if (char2a == char2b)
4666   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
4667 else
4668   {
4669   bit2 = char2a ^ char2b;
4670   if (is_powerof2(bit2))
4671     {
4672     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
4673     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
4674     }
4675   else
4676     {
4677     bit2 = 0;
4678     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
4679     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
4680     }
4681   }
4682 
4683 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
4684 sljit_emit_op_custom(compiler, instruction, 4);
4685 
4686 if (char2a != char2b)
4687   {
4688   instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
4689   sljit_emit_op_custom(compiler, instruction, 4);
4690   }
4691 
4692 /* PSHUFD xmm1, xmm2/m128, imm8 */
4693 /* instruction[0] = 0x66; */
4694 /* instruction[1] = 0x0f; */
4695 instruction[2] = 0x70;
4696 instruction[4] = 0;
4697 
4698 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
4699 sljit_emit_op_custom(compiler, instruction, 5);
4700 
4701 if (char1a != char1b)
4702   {
4703   instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
4704   sljit_emit_op_custom(compiler, instruction, 5);
4705   }
4706 
4707 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
4708 sljit_emit_op_custom(compiler, instruction, 5);
4709 
4710 if (char2a != char2b)
4711   {
4712   instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
4713   sljit_emit_op_custom(compiler, instruction, 5);
4714   }
4715 
4716 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4717 restart = LABEL();
4718 #endif
4719 
4720 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
4721 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4722 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
4724 
4725 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
4726 
4727 jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
4728 
4729 load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
4730 
4731 /* MOVDQA xmm1, xmm2/m128 */
4732 /* instruction[0] = 0x66; */
4733 /* instruction[1] = 0x0f; */
4734 instruction[2] = 0x6f;
4735 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
4736 sljit_emit_op_custom(compiler, instruction, 4);
4737 
4738 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4739 /* instruction[0] = 0x66; */
4740 /* instruction[1] = 0x0f; */
4741 instruction[2] = 0x73;
4742 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
4743 instruction[4] = diff;
4744 sljit_emit_op_custom(compiler, instruction, 5);
4745 
4746 /* PSRLDQ xmm1, xmm2/m128, imm8 */
4747 /* instruction[0] = 0x66; */
4748 /* instruction[1] = 0x0f; */
4749 /* instruction[2] = 0x73; */
4750 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
4751 instruction[4] = 16 - diff;
4752 sljit_emit_op_custom(compiler, instruction, 5);
4753 
4754 /* POR xmm1, xmm2/m128 */
4755 /* instruction[0] = 0x66; */
4756 /* instruction[1] = 0x0f; */
4757 instruction[2] = 0xeb;
4758 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
4759 sljit_emit_op_custom(compiler, instruction, 4);
4760 
4761 jump[1] = JUMP(SLJIT_JUMP);
4762 
4763 JUMPHERE(jump[0]);
4764 
4765 /* MOVDQA xmm1, xmm2/m128 */
4766 /* instruction[0] = 0x66; */
4767 /* instruction[1] = 0x0f; */
4768 instruction[2] = 0x6f;
4769 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
4770 sljit_emit_op_custom(compiler, instruction, 4);
4771 
4772 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4773 /* instruction[0] = 0x66; */
4774 /* instruction[1] = 0x0f; */
4775 instruction[2] = 0x73;
4776 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
4777 instruction[4] = diff;
4778 sljit_emit_op_custom(compiler, instruction, 5);
4779 
4780 JUMPHERE(jump[1]);
4781 
4782 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
4783 
4784 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
4785 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
4786 
4787 /* PAND xmm1, xmm2/m128 */
4788 /* instruction[0] = 0x66; */
4789 /* instruction[1] = 0x0f; */
4790 instruction[2] = 0xdb;
4791 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
4792 sljit_emit_op_custom(compiler, instruction, 4);
4793 
4794 /* PMOVMSKB reg, xmm */
4795 /* instruction[0] = 0x66; */
4796 /* instruction[1] = 0x0f; */
4797 instruction[2] = 0xd7;
4798 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4799 sljit_emit_op_custom(compiler, instruction, 4);
4800 
4801 /* Ignore matches before the first STR_PTR. */
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4803 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
4804 
4805 /* BSF r32, r/m32 */
4806 instruction[0] = 0x0f;
4807 instruction[1] = 0xbc;
4808 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4809 sljit_emit_op_custom(compiler, instruction, 3);
4810 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4811 
4812 jump[0] = JUMP(SLJIT_NOT_ZERO);
4813 
4814 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4815 
4816 /* Main loop. */
4817 instruction[0] = 0x66;
4818 instruction[1] = 0x0f;
4819 
4820 start = LABEL();
4821 
4822 load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
4823 
4824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4825 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4826 
4827 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
4828 
4829 /* PSRLDQ xmm1, xmm2/m128, imm8 */
4830 /* instruction[0] = 0x66; */
4831 /* instruction[1] = 0x0f; */
4832 instruction[2] = 0x73;
4833 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
4834 instruction[4] = 16 - diff;
4835 sljit_emit_op_custom(compiler, instruction, 5);
4836 
4837 /* MOVDQA xmm1, xmm2/m128 */
4838 /* instruction[0] = 0x66; */
4839 /* instruction[1] = 0x0f; */
4840 instruction[2] = 0x6f;
4841 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
4842 sljit_emit_op_custom(compiler, instruction, 4);
4843 
4844 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4845 /* instruction[0] = 0x66; */
4846 /* instruction[1] = 0x0f; */
4847 instruction[2] = 0x73;
4848 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
4849 instruction[4] = diff;
4850 sljit_emit_op_custom(compiler, instruction, 5);
4851 
4852 /* POR xmm1, xmm2/m128 */
4853 /* instruction[0] = 0x66; */
4854 /* instruction[1] = 0x0f; */
4855 instruction[2] = 0xeb;
4856 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
4857 sljit_emit_op_custom(compiler, instruction, 4);
4858 
4859 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
4860 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
4861 
4862 /* PAND xmm1, xmm2/m128 */
4863 /* instruction[0] = 0x66; */
4864 /* instruction[1] = 0x0f; */
4865 instruction[2] = 0xdb;
4866 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
4867 sljit_emit_op_custom(compiler, instruction, 4);
4868 
4869 /* PMOVMSKB reg, xmm */
4870 /* instruction[0] = 0x66; */
4871 /* instruction[1] = 0x0f; */
4872 instruction[2] = 0xd7;
4873 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4874 sljit_emit_op_custom(compiler, instruction, 4);
4875 
4876 /* BSF r32, r/m32 */
4877 instruction[0] = 0x0f;
4878 instruction[1] = 0xbc;
4879 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4880 sljit_emit_op_custom(compiler, instruction, 3);
4881 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4882 
4883 JUMPTO(SLJIT_ZERO, start);
4884 
4885 JUMPHERE(jump[0]);
4886 
4887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4888 
4889 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4890 
4891 if (common->match_end_ptr != 0)
4892   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4893 
4894 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4895 if (common->utf)
4896   {
4897   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
4898 
4899   jump[0] = jump_if_utf_char_start(compiler, TMP1);
4900 
4901   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4902   CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
4903 
4904   add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
4905 
4906   JUMPHERE(jump[0]);
4907   }
4908 #endif
4909 
4910 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
4911 
4912 if (common->match_end_ptr != 0)
4913   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4914 }
4915 
check_fast_forward_char_pair_sse2(compiler_common * common,fast_forward_char_data * chars,int max)4916 static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
4917 {
4918 sljit_s32 i, j, priority, count;
4919 sljit_u32 priorities;
4920 PCRE2_UCHAR a1, a2, b1, b2;
4921 
4922 priorities = 0;
4923 
4924 count = 0;
4925 for (i = 0; i < max; i++)
4926   {
4927   if (chars[i].last_count > 2)
4928     {
4929     SLJIT_ASSERT(chars[i].last_count <= 7);
4930 
4931     priorities |= (1 << chars[i].last_count);
4932     count++;
4933     }
4934   }
4935 
4936 if (count < 2)
4937   return FALSE;
4938 
4939 for (priority = 7; priority > 2; priority--)
4940   {
4941   if ((priorities & (1 << priority)) == 0)
4942     continue;
4943 
4944   for (i = max - 1; i >= 1; i--)
4945     if (chars[i].last_count >= priority)
4946       {
4947       SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
4948 
4949       a1 = chars[i].chars[0];
4950       a2 = chars[i].chars[1];
4951 
4952       j = i - max_fast_forward_char_pair_sse2_offset();
4953       if (j < 0)
4954         j = 0;
4955 
4956       while (j < i)
4957         {
4958         if (chars[j].last_count >= priority)
4959           {
4960           b1 = chars[j].chars[0];
4961           b2 = chars[j].chars[1];
4962 
4963           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
4964             {
4965             fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
4966             return TRUE;
4967             }
4968           }
4969         j++;
4970         }
4971       }
4972   }
4973 
4974 return FALSE;
4975 }
4976 
4977 #endif
4978 
4979 #undef SSE2_COMPARE_TYPE_INDEX
4980 
4981 #endif
4982 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4983 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4984 {
4985 DEFINE_COMPILER;
4986 struct sljit_label *start;
4987 struct sljit_jump *match;
4988 struct sljit_jump *partial_quit;
4989 PCRE2_UCHAR mask;
4990 BOOL has_match_end = (common->match_end_ptr != 0);
4991 
4992 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
4993 
4994 if (has_match_end)
4995   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4996 
4997 if (offset > 0)
4998   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4999 
5000 if (has_match_end)
5001   {
5002   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5003 
5004   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5005   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5006   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5007   }
5008 
5009 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
5010 
5011 /* SSE2 accelerated first character search. */
5012 
5013 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
5014   {
5015   fast_forward_first_char2_sse2(common, char1, char2, offset);
5016 
5017   if (offset > 0)
5018     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5019 
5020   if (has_match_end)
5021     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5022   return;
5023   }
5024 
5025 #endif
5026 
5027 start = LABEL();
5028 
5029 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5030 if (common->mode == PCRE2_JIT_COMPLETE)
5031   add_jump(compiler, &common->failed_match, partial_quit);
5032 
5033 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5035 
5036 if (char1 == char2)
5037   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5038 else
5039   {
5040   mask = char1 ^ char2;
5041   if (is_powerof2(mask))
5042     {
5043     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5044     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5045     }
5046   else
5047     {
5048     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5049     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5050     JUMPHERE(match);
5051     }
5052   }
5053 
5054 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5055 if (common->utf && offset > 0)
5056   {
5057   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5058   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5059   }
5060 #endif
5061 
5062 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5063 
5064 if (common->mode != PCRE2_JIT_COMPLETE)
5065   JUMPHERE(partial_quit);
5066 
5067 if (has_match_end)
5068   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5069 }
5070 
fast_forward_first_n_chars(compiler_common * common)5071 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5072 {
5073 DEFINE_COMPILER;
5074 struct sljit_label *start;
5075 struct sljit_jump *match;
5076 fast_forward_char_data chars[MAX_N_CHARS];
5077 sljit_s32 offset;
5078 PCRE2_UCHAR mask;
5079 PCRE2_UCHAR *char_set, *char_set_end;
5080 int i, max, from;
5081 int range_right = -1, range_len;
5082 sljit_u8 *update_table = NULL;
5083 BOOL in_range;
5084 sljit_u32 rec_count;
5085 
5086 for (i = 0; i < MAX_N_CHARS; i++)
5087   {
5088   chars[i].count = 0;
5089   chars[i].last_count = 0;
5090   }
5091 
5092 rec_count = 10000;
5093 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5094 
5095 if (max < 1)
5096   return FALSE;
5097 
5098 /* Convert last_count to priority. */
5099 for (i = 0; i < max; i++)
5100   {
5101   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5102 
5103   if (chars[i].count == 1)
5104     {
5105     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5106     /* Simplifies algorithms later. */
5107     chars[i].chars[1] = chars[i].chars[0];
5108     }
5109   else if (chars[i].count == 2)
5110     {
5111     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5112 
5113     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5114       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5115     else
5116       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5117     }
5118   else
5119     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5120   }
5121 
5122 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
5123 if (check_fast_forward_char_pair_sse2(common, chars, max))
5124   return TRUE;
5125 #endif
5126 
5127 in_range = FALSE;
5128 /* Prevent compiler "uninitialized" warning */
5129 from = 0;
5130 range_len = 4 /* minimum length */ - 1;
5131 for (i = 0; i <= max; i++)
5132   {
5133   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5134     {
5135     range_len = i - from;
5136     range_right = i - 1;
5137     }
5138 
5139   if (i < max && chars[i].count < 255)
5140     {
5141     SLJIT_ASSERT(chars[i].count > 0);
5142     if (!in_range)
5143       {
5144       in_range = TRUE;
5145       from = i;
5146       }
5147     }
5148   else
5149     in_range = FALSE;
5150   }
5151 
5152 if (range_right >= 0)
5153   {
5154   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
5155   if (update_table == NULL)
5156     return TRUE;
5157   memset(update_table, IN_UCHARS(range_len), 256);
5158 
5159   for (i = 0; i < range_len; i++)
5160     {
5161     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
5162 
5163     char_set = chars[range_right - i].chars;
5164     char_set_end = char_set + chars[range_right - i].count;
5165     do
5166       {
5167       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
5168         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
5169       char_set++;
5170       }
5171     while (char_set < char_set_end);
5172     }
5173   }
5174 
5175 offset = -1;
5176 /* Scan forward. */
5177 for (i = 0; i < max; i++)
5178   {
5179   if (range_right == i)
5180     continue;
5181 
5182   if (offset == -1)
5183     {
5184     if (chars[i].last_count >= 2)
5185       offset = i;
5186     }
5187   else if (chars[offset].last_count < chars[i].last_count)
5188     offset = i;
5189   }
5190 
5191 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
5192 
5193 if (range_right < 0)
5194   {
5195   if (offset < 0)
5196     return FALSE;
5197   /* Works regardless the value is 1 or 2. */
5198   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
5199   return TRUE;
5200   }
5201 
5202 SLJIT_ASSERT(range_right != offset);
5203 
5204 if (common->match_end_ptr != 0)
5205   {
5206   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5207   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5208   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5209   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5210   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5211   }
5212 else
5213   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5214 
5215 SLJIT_ASSERT(range_right >= 0);
5216 
5217 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5218 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
5219 #endif
5220 
5221 start = LABEL();
5222 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5223 
5224 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
5225 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
5226 #else
5227 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
5228 #endif
5229 
5230 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5231 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
5232 #else
5233 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
5234 #endif
5235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5236 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
5237 
5238 if (offset >= 0)
5239   {
5240   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
5241   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5242 
5243   if (chars[offset].count == 1)
5244     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
5245   else
5246     {
5247     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
5248     if (is_powerof2(mask))
5249       {
5250       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5251       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
5252       }
5253     else
5254       {
5255       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
5256       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
5257       JUMPHERE(match);
5258       }
5259     }
5260   }
5261 
5262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5263 if (common->utf && offset != 0)
5264   {
5265   if (offset < 0)
5266     {
5267     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5268     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5269     }
5270   else
5271     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5272 
5273   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5274 
5275   if (offset < 0)
5276     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5277   }
5278 #endif
5279 
5280 if (offset >= 0)
5281   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5282 
5283 if (common->match_end_ptr != 0)
5284   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5285 else
5286   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5287 return TRUE;
5288 }
5289 
fast_forward_first_char(compiler_common * common)5290 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
5291 {
5292 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
5293 PCRE2_UCHAR oc;
5294 
5295 oc = first_char;
5296 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
5297   {
5298   oc = TABLE_GET(first_char, common->fcc, first_char);
5299 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
5300   if (first_char > 127 && common->utf)
5301     oc = UCD_OTHERCASE(first_char);
5302 #endif
5303   }
5304 
5305 fast_forward_first_char2(common, first_char, oc, 0);
5306 }
5307 
fast_forward_newline(compiler_common * common)5308 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
5309 {
5310 DEFINE_COMPILER;
5311 struct sljit_label *loop;
5312 struct sljit_jump *lastchar;
5313 struct sljit_jump *firstchar;
5314 struct sljit_jump *quit;
5315 struct sljit_jump *foundcr = NULL;
5316 struct sljit_jump *notfoundnl;
5317 jump_list *newline = NULL;
5318 
5319 if (common->match_end_ptr != 0)
5320   {
5321   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5322   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5323   }
5324 
5325 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5326   {
5327   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5328   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5329   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5330   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5331   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
5332 
5333   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
5334   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
5335   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
5336 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5337   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5338 #endif
5339   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5340 
5341   loop = LABEL();
5342   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5343   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5344   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5345   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5346   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
5347   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
5348 
5349   JUMPHERE(quit);
5350   JUMPHERE(firstchar);
5351   JUMPHERE(lastchar);
5352 
5353   if (common->match_end_ptr != 0)
5354     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5355   return;
5356   }
5357 
5358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5359 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5360 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
5361 skip_char_back(common);
5362 
5363 loop = LABEL();
5364 common->ff_newline_shortcut = loop;
5365 
5366 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5367 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5368 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
5369   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5370 check_newlinechar(common, common->nltype, &newline, FALSE);
5371 set_jumps(newline, loop);
5372 
5373 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
5374   {
5375   quit = JUMP(SLJIT_JUMP);
5376   JUMPHERE(foundcr);
5377   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5378   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5379   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
5380   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5381 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5382   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5383 #endif
5384   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5385   JUMPHERE(notfoundnl);
5386   JUMPHERE(quit);
5387   }
5388 JUMPHERE(lastchar);
5389 JUMPHERE(firstchar);
5390 
5391 if (common->match_end_ptr != 0)
5392   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5393 }
5394 
5395 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
5396 
fast_forward_start_bits(compiler_common * common)5397 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
5398 {
5399 DEFINE_COMPILER;
5400 const sljit_u8 *start_bits = common->re->start_bitmap;
5401 struct sljit_label *start;
5402 struct sljit_jump *partial_quit;
5403 #if PCRE2_CODE_UNIT_WIDTH != 8
5404 struct sljit_jump *found = NULL;
5405 #endif
5406 jump_list *matches = NULL;
5407 
5408 if (common->match_end_ptr != 0)
5409   {
5410   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5411   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
5412   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5413   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5414   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5415   }
5416 
5417 start = LABEL();
5418 
5419 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5420 if (common->mode == PCRE2_JIT_COMPLETE)
5421   add_jump(compiler, &common->failed_match, partial_quit);
5422 
5423 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5425 
5426 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
5427   {
5428 #if PCRE2_CODE_UNIT_WIDTH != 8
5429   if ((start_bits[31] & 0x80) != 0)
5430     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
5431   else
5432     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
5433 #elif defined SUPPORT_UNICODE
5434   if (common->utf && is_char7_bitset(start_bits, FALSE))
5435     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
5436 #endif
5437   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5438   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5439   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
5440   if (sljit_get_register_index(TMP3) >= 0)
5441     {
5442     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
5443     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
5444     }
5445   else
5446     {
5447     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5448     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5449     }
5450   JUMPTO(SLJIT_ZERO, start);
5451   }
5452 else
5453   set_jumps(matches, start);
5454 
5455 #if PCRE2_CODE_UNIT_WIDTH != 8
5456 if (found != NULL)
5457   JUMPHERE(found);
5458 #endif
5459 
5460 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5461 
5462 if (common->mode != PCRE2_JIT_COMPLETE)
5463   JUMPHERE(partial_quit);
5464 
5465 if (common->match_end_ptr != 0)
5466   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
5467 }
5468 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)5469 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
5470 {
5471 DEFINE_COMPILER;
5472 struct sljit_label *loop;
5473 struct sljit_jump *toolong;
5474 struct sljit_jump *alreadyfound;
5475 struct sljit_jump *found;
5476 struct sljit_jump *foundoc = NULL;
5477 struct sljit_jump *notfound;
5478 sljit_u32 oc, bit;
5479 
5480 SLJIT_ASSERT(common->req_char_ptr != 0);
5481 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
5482 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
5483 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
5484 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5485 
5486 if (has_firstchar)
5487   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5488 else
5489   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
5490 
5491 loop = LABEL();
5492 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
5493 
5494 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
5495 oc = req_char;
5496 if (caseless)
5497   {
5498   oc = TABLE_GET(req_char, common->fcc, req_char);
5499 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
5500   if (req_char > 127 && common->utf)
5501     oc = UCD_OTHERCASE(req_char);
5502 #endif
5503   }
5504 if (req_char == oc)
5505   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
5506 else
5507   {
5508   bit = req_char ^ oc;
5509   if (is_powerof2(bit))
5510     {
5511     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
5512     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
5513     }
5514   else
5515     {
5516     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
5517     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
5518     }
5519   }
5520 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5521 JUMPTO(SLJIT_JUMP, loop);
5522 
5523 JUMPHERE(found);
5524 if (foundoc)
5525   JUMPHERE(foundoc);
5526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
5527 JUMPHERE(alreadyfound);
5528 JUMPHERE(toolong);
5529 return notfound;
5530 }
5531 
do_revertframes(compiler_common * common)5532 static void do_revertframes(compiler_common *common)
5533 {
5534 DEFINE_COMPILER;
5535 struct sljit_jump *jump;
5536 struct sljit_label *mainloop;
5537 
5538 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5539 GET_LOCAL_BASE(TMP1, 0, 0);
5540 
5541 /* Drop frames until we reach STACK_TOP. */
5542 mainloop = LABEL();
5543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
5544 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5545 
5546 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5547 if (sljit_get_register_index (TMP3) < 0)
5548   {
5549   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5550   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
5551   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
5552   }
5553 else
5554   {
5555   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5556   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
5557   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
5558   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
5559   GET_LOCAL_BASE(TMP1, 0, 0);
5560   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
5561   }
5562 JUMPTO(SLJIT_JUMP, mainloop);
5563 
5564 JUMPHERE(jump);
5565 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
5566 /* End of reverting values. */
5567 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5568 
5569 JUMPHERE(jump);
5570 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
5571 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5572 if (sljit_get_register_index (TMP3) < 0)
5573   {
5574   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5575   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
5576   }
5577 else
5578   {
5579   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5580   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
5581   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
5582   }
5583 JUMPTO(SLJIT_JUMP, mainloop);
5584 }
5585 
check_wordboundary(compiler_common * common)5586 static void check_wordboundary(compiler_common *common)
5587 {
5588 DEFINE_COMPILER;
5589 struct sljit_jump *skipread;
5590 jump_list *skipread_list = NULL;
5591 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
5592 struct sljit_jump *jump;
5593 #endif
5594 
5595 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
5596 
5597 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5598 /* Get type of the previous char, and put it to LOCALS1. */
5599 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5601 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
5602 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
5603 skip_char_back(common);
5604 check_start_used_ptr(common);
5605 read_char(common);
5606 
5607 /* Testing char type. */
5608 #ifdef SUPPORT_UNICODE
5609 if (common->use_ucp)
5610   {
5611   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
5612   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
5613   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5614   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
5615   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5616   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5617   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
5618   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5619   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5620   JUMPHERE(jump);
5621   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
5622   }
5623 else
5624 #endif
5625   {
5626 #if PCRE2_CODE_UNIT_WIDTH != 8
5627   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5628 #elif defined SUPPORT_UNICODE
5629   /* Here LOCALS1 has already been zeroed. */
5630   jump = NULL;
5631   if (common->utf)
5632     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5633 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5634   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
5635   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
5636   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5637   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
5638 #if PCRE2_CODE_UNIT_WIDTH != 8
5639   JUMPHERE(jump);
5640 #elif defined SUPPORT_UNICODE
5641   if (jump != NULL)
5642     JUMPHERE(jump);
5643 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5644   }
5645 JUMPHERE(skipread);
5646 
5647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5648 check_str_end(common, &skipread_list);
5649 peek_char(common, READ_CHAR_MAX);
5650 
5651 /* Testing char type. This is a code duplication. */
5652 #ifdef SUPPORT_UNICODE
5653 if (common->use_ucp)
5654   {
5655   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
5656   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
5657   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5658   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
5659   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5660   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5661   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
5662   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5663   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5664   JUMPHERE(jump);
5665   }
5666 else
5667 #endif
5668   {
5669 #if PCRE2_CODE_UNIT_WIDTH != 8
5670   /* TMP2 may be destroyed by peek_char. */
5671   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5672   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5673 #elif defined SUPPORT_UNICODE
5674   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5675   jump = NULL;
5676   if (common->utf)
5677     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5678 #endif
5679   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
5680   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
5681   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5682 #if PCRE2_CODE_UNIT_WIDTH != 8
5683   JUMPHERE(jump);
5684 #elif defined SUPPORT_UNICODE
5685   if (jump != NULL)
5686     JUMPHERE(jump);
5687 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5688   }
5689 set_jumps(skipread_list, LABEL());
5690 
5691 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5692 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5693 }
5694 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5695 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5696 {
5697 /* May destroy TMP1. */
5698 DEFINE_COMPILER;
5699 int ranges[MAX_CLASS_RANGE_SIZE];
5700 sljit_u8 bit, cbit, all;
5701 int i, byte, length = 0;
5702 
5703 bit = bits[0] & 0x1;
5704 /* All bits will be zero or one (since bit is zero or one). */
5705 all = -bit;
5706 
5707 for (i = 0; i < 256; )
5708   {
5709   byte = i >> 3;
5710   if ((i & 0x7) == 0 && bits[byte] == all)
5711     i += 8;
5712   else
5713     {
5714     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5715     if (cbit != bit)
5716       {
5717       if (length >= MAX_CLASS_RANGE_SIZE)
5718         return FALSE;
5719       ranges[length] = i;
5720       length++;
5721       bit = cbit;
5722       all = -cbit;
5723       }
5724     i++;
5725     }
5726   }
5727 
5728 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5729   {
5730   if (length >= MAX_CLASS_RANGE_SIZE)
5731     return FALSE;
5732   ranges[length] = 256;
5733   length++;
5734   }
5735 
5736 if (length < 0 || length > 4)
5737   return FALSE;
5738 
5739 bit = bits[0] & 0x1;
5740 if (invert) bit ^= 0x1;
5741 
5742 /* No character is accepted. */
5743 if (length == 0 && bit == 0)
5744   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5745 
5746 switch(length)
5747   {
5748   case 0:
5749   /* When bit != 0, all characters are accepted. */
5750   return TRUE;
5751 
5752   case 1:
5753   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5754   return TRUE;
5755 
5756   case 2:
5757   if (ranges[0] + 1 != ranges[1])
5758     {
5759     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5760     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5761     }
5762   else
5763     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5764   return TRUE;
5765 
5766   case 3:
5767   if (bit != 0)
5768     {
5769     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5770     if (ranges[0] + 1 != ranges[1])
5771       {
5772       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5773       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5774       }
5775     else
5776       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5777     return TRUE;
5778     }
5779 
5780   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5781   if (ranges[1] + 1 != ranges[2])
5782     {
5783     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5784     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5785     }
5786   else
5787     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5788   return TRUE;
5789 
5790   case 4:
5791   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5792       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5793       && (ranges[1] & (ranges[2] - ranges[0])) == 0
5794       && is_powerof2(ranges[2] - ranges[0]))
5795     {
5796     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5797     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5798     if (ranges[2] + 1 != ranges[3])
5799       {
5800       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5801       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5802       }
5803     else
5804       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5805     return TRUE;
5806     }
5807 
5808   if (bit != 0)
5809     {
5810     i = 0;
5811     if (ranges[0] + 1 != ranges[1])
5812       {
5813       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5814       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5815       i = ranges[0];
5816       }
5817     else
5818       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5819 
5820     if (ranges[2] + 1 != ranges[3])
5821       {
5822       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5823       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5824       }
5825     else
5826       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5827     return TRUE;
5828     }
5829 
5830   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5831   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5832   if (ranges[1] + 1 != ranges[2])
5833     {
5834     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5835     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5836     }
5837   else
5838     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5839   return TRUE;
5840 
5841   default:
5842   SLJIT_UNREACHABLE();
5843   return FALSE;
5844   }
5845 }
5846 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5847 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5848 {
5849 /* May destroy TMP1. */
5850 DEFINE_COMPILER;
5851 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
5852 uint8_t byte;
5853 sljit_s32 type;
5854 int i, j, k, len, c;
5855 
5856 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5857   return FALSE;
5858 
5859 if (invert)
5860   nclass = !nclass;
5861 
5862 len = 0;
5863 
5864 for (i = 0; i < 32; i++)
5865   {
5866   byte = bits[i];
5867 
5868   if (nclass)
5869     byte = ~byte;
5870 
5871   j = 0;
5872   while (byte != 0)
5873     {
5874     if (byte & 0x1)
5875       {
5876       c = i * 8 + j;
5877 
5878       k = len;
5879 
5880       if ((c & 0x20) != 0)
5881         {
5882         for (k = 0; k < len; k++)
5883           if (char_list[k] == c - 0x20)
5884             {
5885             char_list[k] |= 0x120;
5886             break;
5887             }
5888         }
5889 
5890       if (k == len)
5891         {
5892         if (len >= MAX_CLASS_CHARS_SIZE)
5893           return FALSE;
5894 
5895         char_list[len++] = (uint16_t) c;
5896         }
5897       }
5898 
5899     byte >>= 1;
5900     j++;
5901     }
5902   }
5903 
5904 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
5905 
5906 i = 0;
5907 j = 0;
5908 
5909 if (char_list[0] == 0)
5910   {
5911   i++;
5912   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
5913   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
5914   }
5915 else
5916   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5917 
5918 while (i < len)
5919   {
5920   if ((char_list[i] & 0x100) != 0)
5921     j++;
5922   else
5923     {
5924     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
5925     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
5926     }
5927   i++;
5928   }
5929 
5930 if (j != 0)
5931   {
5932   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
5933 
5934   for (i = 0; i < len; i++)
5935     if ((char_list[i] & 0x100) != 0)
5936       {
5937       j--;
5938       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
5939       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
5940       }
5941   }
5942 
5943 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
5944 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
5945 return TRUE;
5946 }
5947 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5948 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5949 {
5950 /* May destroy TMP1. */
5951 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
5952   return TRUE;
5953 return optimize_class_chars(common, bits, nclass, invert, backtracks);
5954 }
5955 
check_anynewline(compiler_common * common)5956 static void check_anynewline(compiler_common *common)
5957 {
5958 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5959 DEFINE_COMPILER;
5960 
5961 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5962 
5963 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5964 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5965 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5966 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5967 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5968 #if PCRE2_CODE_UNIT_WIDTH == 8
5969 if (common->utf)
5970   {
5971 #endif
5972   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5973   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5974   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5975 #if PCRE2_CODE_UNIT_WIDTH == 8
5976   }
5977 #endif
5978 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5979 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5980 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5981 }
5982 
check_hspace(compiler_common * common)5983 static void check_hspace(compiler_common *common)
5984 {
5985 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5986 DEFINE_COMPILER;
5987 
5988 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5989 
5990 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5992 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5993 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5994 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5995 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5996 #if PCRE2_CODE_UNIT_WIDTH == 8
5997 if (common->utf)
5998   {
5999 #endif
6000   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6001   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
6002   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6003   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
6004   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6005   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
6006   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
6007   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6008   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
6009   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6010   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
6011   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6012   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
6013 #if PCRE2_CODE_UNIT_WIDTH == 8
6014   }
6015 #endif
6016 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6017 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6018 
6019 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6020 }
6021 
check_vspace(compiler_common * common)6022 static void check_vspace(compiler_common *common)
6023 {
6024 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6025 DEFINE_COMPILER;
6026 
6027 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6028 
6029 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6030 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6031 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6032 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6033 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6034 #if PCRE2_CODE_UNIT_WIDTH == 8
6035 if (common->utf)
6036   {
6037 #endif
6038   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6039   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6040   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6041 #if PCRE2_CODE_UNIT_WIDTH == 8
6042   }
6043 #endif
6044 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6045 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6046 
6047 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6048 }
6049 
do_casefulcmp(compiler_common * common)6050 static void do_casefulcmp(compiler_common *common)
6051 {
6052 DEFINE_COMPILER;
6053 struct sljit_jump *jump;
6054 struct sljit_label *label;
6055 int char1_reg;
6056 int char2_reg;
6057 
6058 if (sljit_get_register_index(TMP3) < 0)
6059   {
6060   char1_reg = STR_END;
6061   char2_reg = STACK_TOP;
6062   }
6063 else
6064   {
6065   char1_reg = TMP3;
6066   char2_reg = RETURN_ADDR;
6067   }
6068 
6069 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6070 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6071 
6072 if (char1_reg == STR_END)
6073   {
6074   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
6075   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
6076   }
6077 
6078 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6079   {
6080   label = LABEL();
6081   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6082   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6083   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6084   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6085   JUMPTO(SLJIT_NOT_ZERO, label);
6086 
6087   JUMPHERE(jump);
6088   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6089   }
6090 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6091   {
6092   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6093   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6094 
6095   label = LABEL();
6096   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6097   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6098   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6099   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6100   JUMPTO(SLJIT_NOT_ZERO, label);
6101 
6102   JUMPHERE(jump);
6103   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6104   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6105   }
6106 else
6107   {
6108   label = LABEL();
6109   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
6110   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
6111   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6112   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6113   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6114   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6115   JUMPTO(SLJIT_NOT_ZERO, label);
6116 
6117   JUMPHERE(jump);
6118   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6119   }
6120 
6121 if (char1_reg == STR_END)
6122   {
6123   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
6124   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
6125   }
6126 
6127 sljit_emit_fast_return(compiler, TMP1, 0);
6128 }
6129 
do_caselesscmp(compiler_common * common)6130 static void do_caselesscmp(compiler_common *common)
6131 {
6132 DEFINE_COMPILER;
6133 struct sljit_jump *jump;
6134 struct sljit_label *label;
6135 int char1_reg = STR_END;
6136 int char2_reg;
6137 int lcc_table;
6138 int opt_type = 0;
6139 
6140 if (sljit_get_register_index(TMP3) < 0)
6141   {
6142   char2_reg = STACK_TOP;
6143   lcc_table = STACK_LIMIT;
6144   }
6145 else
6146   {
6147   char2_reg = RETURN_ADDR;
6148   lcc_table = TMP3;
6149   }
6150 
6151 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6152   opt_type = 1;
6153 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6154   opt_type = 2;
6155 
6156 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6157 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6158 
6159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
6160 
6161 if (char2_reg == STACK_TOP)
6162   {
6163   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
6164   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
6165   }
6166 
6167 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
6168 
6169 if (opt_type == 1)
6170   {
6171   label = LABEL();
6172   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6173   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6174   }
6175 else if (opt_type == 2)
6176   {
6177   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6178   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6179 
6180   label = LABEL();
6181   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6182   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6183   }
6184 else
6185   {
6186   label = LABEL();
6187   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
6188   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
6189   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6190   }
6191 
6192 #if PCRE2_CODE_UNIT_WIDTH != 8
6193 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
6194 #endif
6195 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
6196 #if PCRE2_CODE_UNIT_WIDTH != 8
6197 JUMPHERE(jump);
6198 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
6199 #endif
6200 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
6201 #if PCRE2_CODE_UNIT_WIDTH != 8
6202 JUMPHERE(jump);
6203 #endif
6204 
6205 if (opt_type == 0)
6206   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6207 
6208 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6209 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6210 JUMPTO(SLJIT_NOT_ZERO, label);
6211 
6212 JUMPHERE(jump);
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6214 
6215 if (opt_type == 2)
6216   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6217 
6218 if (char2_reg == STACK_TOP)
6219   {
6220   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
6221   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
6222   }
6223 
6224 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6225 sljit_emit_fast_return(compiler, TMP1, 0);
6226 }
6227 
6228 #if defined SUPPORT_UNICODE
6229 
do_utf_caselesscmp(PCRE2_SPTR src1,PCRE2_SPTR src2,PCRE2_SPTR end1,PCRE2_SPTR end2)6230 static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, PCRE2_SPTR src2, PCRE2_SPTR end1, PCRE2_SPTR end2)
6231 {
6232 /* This function would be ineffective to do in JIT level. */
6233 sljit_u32 c1, c2;
6234 const ucd_record *ur;
6235 const sljit_u32 *pp;
6236 
6237 while (src1 < end1)
6238   {
6239   if (src2 >= end2)
6240     return (PCRE2_SPTR)1;
6241   GETCHARINC(c1, src1);
6242   GETCHARINC(c2, src2);
6243   ur = GET_UCD(c2);
6244   if (c1 != c2 && c1 != c2 + ur->other_case)
6245     {
6246     pp = PRIV(ucd_caseless_sets) + ur->caseset;
6247     for (;;)
6248       {
6249       if (c1 < *pp) return NULL;
6250       if (c1 == *pp++) break;
6251       }
6252     }
6253   }
6254 return src2;
6255 }
6256 
6257 #endif /* SUPPORT_UNICODE */
6258 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)6259 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
6260     compare_context *context, jump_list **backtracks)
6261 {
6262 DEFINE_COMPILER;
6263 unsigned int othercasebit = 0;
6264 PCRE2_SPTR othercasechar = NULL;
6265 #ifdef SUPPORT_UNICODE
6266 int utflength;
6267 #endif
6268 
6269 if (caseless && char_has_othercase(common, cc))
6270   {
6271   othercasebit = char_get_othercase_bit(common, cc);
6272   SLJIT_ASSERT(othercasebit);
6273   /* Extracting bit difference info. */
6274 #if PCRE2_CODE_UNIT_WIDTH == 8
6275   othercasechar = cc + (othercasebit >> 8);
6276   othercasebit &= 0xff;
6277 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6278   /* Note that this code only handles characters in the BMP. If there
6279   ever are characters outside the BMP whose othercase differs in only one
6280   bit from itself (there currently are none), this code will need to be
6281   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
6282   othercasechar = cc + (othercasebit >> 9);
6283   if ((othercasebit & 0x100) != 0)
6284     othercasebit = (othercasebit & 0xff) << 8;
6285   else
6286     othercasebit &= 0xff;
6287 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
6288   }
6289 
6290 if (context->sourcereg == -1)
6291   {
6292 #if PCRE2_CODE_UNIT_WIDTH == 8
6293 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6294   if (context->length >= 4)
6295     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6296   else if (context->length >= 2)
6297     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6298   else
6299 #endif
6300     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6301 #elif PCRE2_CODE_UNIT_WIDTH == 16
6302 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6303   if (context->length >= 4)
6304     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6305   else
6306 #endif
6307     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6308 #elif PCRE2_CODE_UNIT_WIDTH == 32
6309   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6310 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
6311   context->sourcereg = TMP2;
6312   }
6313 
6314 #ifdef SUPPORT_UNICODE
6315 utflength = 1;
6316 if (common->utf && HAS_EXTRALEN(*cc))
6317   utflength += GET_EXTRALEN(*cc);
6318 
6319 do
6320   {
6321 #endif
6322 
6323   context->length -= IN_UCHARS(1);
6324 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
6325 
6326   /* Unaligned read is supported. */
6327   if (othercasebit != 0 && othercasechar == cc)
6328     {
6329     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
6330     context->oc.asuchars[context->ucharptr] = othercasebit;
6331     }
6332   else
6333     {
6334     context->c.asuchars[context->ucharptr] = *cc;
6335     context->oc.asuchars[context->ucharptr] = 0;
6336     }
6337   context->ucharptr++;
6338 
6339 #if PCRE2_CODE_UNIT_WIDTH == 8
6340   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
6341 #else
6342   if (context->ucharptr >= 2 || context->length == 0)
6343 #endif
6344     {
6345     if (context->length >= 4)
6346       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6347     else if (context->length >= 2)
6348       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6349 #if PCRE2_CODE_UNIT_WIDTH == 8
6350     else if (context->length >= 1)
6351       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6352 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6353     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
6354 
6355     switch(context->ucharptr)
6356       {
6357       case 4 / sizeof(PCRE2_UCHAR):
6358       if (context->oc.asint != 0)
6359         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
6360       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
6361       break;
6362 
6363       case 2 / sizeof(PCRE2_UCHAR):
6364       if (context->oc.asushort != 0)
6365         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
6366       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
6367       break;
6368 
6369 #if PCRE2_CODE_UNIT_WIDTH == 8
6370       case 1:
6371       if (context->oc.asbyte != 0)
6372         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
6373       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
6374       break;
6375 #endif
6376 
6377       default:
6378       SLJIT_UNREACHABLE();
6379       break;
6380       }
6381     context->ucharptr = 0;
6382     }
6383 
6384 #else
6385 
6386   /* Unaligned read is unsupported or in 32 bit mode. */
6387   if (context->length >= 1)
6388     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6389 
6390   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
6391 
6392   if (othercasebit != 0 && othercasechar == cc)
6393     {
6394     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
6395     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
6396     }
6397   else
6398     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
6399 
6400 #endif
6401 
6402   cc++;
6403 #ifdef SUPPORT_UNICODE
6404   utflength--;
6405   }
6406 while (utflength > 0);
6407 #endif
6408 
6409 return cc;
6410 }
6411 
6412 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6413 
6414 #define SET_TYPE_OFFSET(value) \
6415   if ((value) != typeoffset) \
6416     { \
6417     if ((value) < typeoffset) \
6418       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
6419     else \
6420       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
6421     } \
6422   typeoffset = (value);
6423 
6424 #define SET_CHAR_OFFSET(value) \
6425   if ((value) != charoffset) \
6426     { \
6427     if ((value) < charoffset) \
6428       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
6429     else \
6430       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
6431     } \
6432   charoffset = (value);
6433 
6434 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
6435 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)6436 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
6437 {
6438 DEFINE_COMPILER;
6439 jump_list *found = NULL;
6440 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
6441 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
6442 struct sljit_jump *jump = NULL;
6443 PCRE2_SPTR ccbegin;
6444 int compares, invertcmp, numberofcmps;
6445 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
6446 BOOL utf = common->utf;
6447 #endif
6448 
6449 #ifdef SUPPORT_UNICODE
6450 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
6451 BOOL charsaved = FALSE;
6452 int typereg = TMP1;
6453 const sljit_u32 *other_cases;
6454 sljit_uw typeoffset;
6455 #endif
6456 
6457 /* Scanning the necessary info. */
6458 cc++;
6459 ccbegin = cc;
6460 compares = 0;
6461 
6462 if (cc[-1] & XCL_MAP)
6463   {
6464   min = 0;
6465   cc += 32 / sizeof(PCRE2_UCHAR);
6466   }
6467 
6468 while (*cc != XCL_END)
6469   {
6470   compares++;
6471   if (*cc == XCL_SINGLE)
6472     {
6473     cc ++;
6474     GETCHARINCTEST(c, cc);
6475     if (c > max) max = c;
6476     if (c < min) min = c;
6477 #ifdef SUPPORT_UNICODE
6478     needschar = TRUE;
6479 #endif
6480     }
6481   else if (*cc == XCL_RANGE)
6482     {
6483     cc ++;
6484     GETCHARINCTEST(c, cc);
6485     if (c < min) min = c;
6486     GETCHARINCTEST(c, cc);
6487     if (c > max) max = c;
6488 #ifdef SUPPORT_UNICODE
6489     needschar = TRUE;
6490 #endif
6491     }
6492 #ifdef SUPPORT_UNICODE
6493   else
6494     {
6495     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6496     cc++;
6497     if (*cc == PT_CLIST)
6498       {
6499       other_cases = PRIV(ucd_caseless_sets) + cc[1];
6500       while (*other_cases != NOTACHAR)
6501         {
6502         if (*other_cases > max) max = *other_cases;
6503         if (*other_cases < min) min = *other_cases;
6504         other_cases++;
6505         }
6506       }
6507     else
6508       {
6509       max = READ_CHAR_MAX;
6510       min = 0;
6511       }
6512 
6513     switch(*cc)
6514       {
6515       case PT_ANY:
6516       /* Any either accepts everything or ignored. */
6517       if (cc[-1] == XCL_PROP)
6518         {
6519         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
6520         if (list == backtracks)
6521           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6522         return;
6523         }
6524       break;
6525 
6526       case PT_LAMP:
6527       case PT_GC:
6528       case PT_PC:
6529       case PT_ALNUM:
6530       needstype = TRUE;
6531       break;
6532 
6533       case PT_SC:
6534       needsscript = TRUE;
6535       break;
6536 
6537       case PT_SPACE:
6538       case PT_PXSPACE:
6539       case PT_WORD:
6540       case PT_PXGRAPH:
6541       case PT_PXPRINT:
6542       case PT_PXPUNCT:
6543       needstype = TRUE;
6544       needschar = TRUE;
6545       break;
6546 
6547       case PT_CLIST:
6548       case PT_UCNC:
6549       needschar = TRUE;
6550       break;
6551 
6552       default:
6553       SLJIT_UNREACHABLE();
6554       break;
6555       }
6556     cc += 2;
6557     }
6558 #endif
6559   }
6560 SLJIT_ASSERT(compares > 0);
6561 
6562 /* We are not necessary in utf mode even in 8 bit mode. */
6563 cc = ccbegin;
6564 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
6565 
6566 if ((cc[-1] & XCL_HASPROP) == 0)
6567   {
6568   if ((cc[-1] & XCL_MAP) != 0)
6569     {
6570     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6571     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
6572       {
6573       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6574       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6575       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6576       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6577       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6578       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
6579       }
6580 
6581     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6582     JUMPHERE(jump);
6583 
6584     cc += 32 / sizeof(PCRE2_UCHAR);
6585     }
6586   else
6587     {
6588     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
6589     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
6590     }
6591   }
6592 else if ((cc[-1] & XCL_MAP) != 0)
6593   {
6594   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6595 #ifdef SUPPORT_UNICODE
6596   charsaved = TRUE;
6597 #endif
6598   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
6599     {
6600 #if PCRE2_CODE_UNIT_WIDTH == 8
6601     jump = NULL;
6602     if (common->utf)
6603 #endif
6604       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6605 
6606     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6607     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6608     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6609     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6610     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6611     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
6612 
6613 #if PCRE2_CODE_UNIT_WIDTH == 8
6614     if (common->utf)
6615 #endif
6616       JUMPHERE(jump);
6617     }
6618 
6619   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6620   cc += 32 / sizeof(PCRE2_UCHAR);
6621   }
6622 
6623 #ifdef SUPPORT_UNICODE
6624 if (needstype || needsscript)
6625   {
6626   if (needschar && !charsaved)
6627     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6628 
6629 #if PCRE2_CODE_UNIT_WIDTH == 32
6630   if (!common->utf)
6631     {
6632     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
6633     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
6634     JUMPHERE(jump);
6635     }
6636 #endif
6637 
6638   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
6639   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6640   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
6641   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
6642   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
6643   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
6644   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
6645   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
6646 
6647   /* Before anything else, we deal with scripts. */
6648   if (needsscript)
6649     {
6650     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
6651     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6652 
6653     ccbegin = cc;
6654 
6655     while (*cc != XCL_END)
6656       {
6657       if (*cc == XCL_SINGLE)
6658         {
6659         cc ++;
6660         GETCHARINCTEST(c, cc);
6661         }
6662       else if (*cc == XCL_RANGE)
6663         {
6664         cc ++;
6665         GETCHARINCTEST(c, cc);
6666         GETCHARINCTEST(c, cc);
6667         }
6668       else
6669         {
6670         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6671         cc++;
6672         if (*cc == PT_SC)
6673           {
6674           compares--;
6675           invertcmp = (compares == 0 && list != backtracks);
6676           if (cc[-1] == XCL_NOTPROP)
6677             invertcmp ^= 0x1;
6678           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
6679           add_jump(compiler, compares > 0 ? list : backtracks, jump);
6680           }
6681         cc += 2;
6682         }
6683       }
6684 
6685     cc = ccbegin;
6686     }
6687 
6688   if (needschar)
6689     {
6690     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6691     }
6692 
6693   if (needstype)
6694     {
6695     if (!needschar)
6696       {
6697       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
6698       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6699       }
6700     else
6701       {
6702       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
6703       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
6704       typereg = RETURN_ADDR;
6705       }
6706     }
6707   }
6708 #endif
6709 
6710 /* Generating code. */
6711 charoffset = 0;
6712 numberofcmps = 0;
6713 #ifdef SUPPORT_UNICODE
6714 typeoffset = 0;
6715 #endif
6716 
6717 while (*cc != XCL_END)
6718   {
6719   compares--;
6720   invertcmp = (compares == 0 && list != backtracks);
6721   jump = NULL;
6722 
6723   if (*cc == XCL_SINGLE)
6724     {
6725     cc ++;
6726     GETCHARINCTEST(c, cc);
6727 
6728     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
6729       {
6730       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6731       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6732       numberofcmps++;
6733       }
6734     else if (numberofcmps > 0)
6735       {
6736       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6737       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6738       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6739       numberofcmps = 0;
6740       }
6741     else
6742       {
6743       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6744       numberofcmps = 0;
6745       }
6746     }
6747   else if (*cc == XCL_RANGE)
6748     {
6749     cc ++;
6750     GETCHARINCTEST(c, cc);
6751     SET_CHAR_OFFSET(c);
6752     GETCHARINCTEST(c, cc);
6753 
6754     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
6755       {
6756       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6757       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6758       numberofcmps++;
6759       }
6760     else if (numberofcmps > 0)
6761       {
6762       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6763       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6764       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6765       numberofcmps = 0;
6766       }
6767     else
6768       {
6769       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6770       numberofcmps = 0;
6771       }
6772     }
6773 #ifdef SUPPORT_UNICODE
6774   else
6775     {
6776     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6777     if (*cc == XCL_NOTPROP)
6778       invertcmp ^= 0x1;
6779     cc++;
6780     switch(*cc)
6781       {
6782       case PT_ANY:
6783       if (!invertcmp)
6784         jump = JUMP(SLJIT_JUMP);
6785       break;
6786 
6787       case PT_LAMP:
6788       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
6789       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6790       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
6791       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6792       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
6793       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6794       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6795       break;
6796 
6797       case PT_GC:
6798       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
6799       SET_TYPE_OFFSET(c);
6800       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
6801       break;
6802 
6803       case PT_PC:
6804       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
6805       break;
6806 
6807       case PT_SC:
6808       compares++;
6809       /* Do nothing. */
6810       break;
6811 
6812       case PT_SPACE:
6813       case PT_PXSPACE:
6814       SET_CHAR_OFFSET(9);
6815       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6816       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6817 
6818       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6819       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6820 
6821       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6822       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6823 
6824       SET_TYPE_OFFSET(ucp_Zl);
6825       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6826       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6827       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6828       break;
6829 
6830       case PT_WORD:
6831       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6832       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6833       /* Fall through. */
6834 
6835       case PT_ALNUM:
6836       SET_TYPE_OFFSET(ucp_Ll);
6837       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6838       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6839       SET_TYPE_OFFSET(ucp_Nd);
6840       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6841       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6842       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6843       break;
6844 
6845       case PT_CLIST:
6846       other_cases = PRIV(ucd_caseless_sets) + cc[1];
6847 
6848       /* At least three characters are required.
6849          Otherwise this case would be handled by the normal code path. */
6850       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6851       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6852 
6853       /* Optimizing character pairs, if their difference is power of 2. */
6854       if (is_powerof2(other_cases[1] ^ other_cases[0]))
6855         {
6856         if (charoffset == 0)
6857           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6858         else
6859           {
6860           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6861           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6862           }
6863         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6864         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6865         other_cases += 2;
6866         }
6867       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6868         {
6869         if (charoffset == 0)
6870           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6871         else
6872           {
6873           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6874           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6875           }
6876         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6877         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6878 
6879         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6880         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6881 
6882         other_cases += 3;
6883         }
6884       else
6885         {
6886         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6887         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6888         }
6889 
6890       while (*other_cases != NOTACHAR)
6891         {
6892         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6893         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6894         }
6895       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6896       break;
6897 
6898       case PT_UCNC:
6899       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6900       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6901       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6902       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6903       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6904       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6905 
6906       SET_CHAR_OFFSET(0xa0);
6907       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6908       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6909       SET_CHAR_OFFSET(0);
6910       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6911       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6912       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6913       break;
6914 
6915       case PT_PXGRAPH:
6916       /* C and Z groups are the farthest two groups. */
6917       SET_TYPE_OFFSET(ucp_Ll);
6918       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6919       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6920 
6921       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6922 
6923       /* In case of ucp_Cf, we overwrite the result. */
6924       SET_CHAR_OFFSET(0x2066);
6925       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6926       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6927 
6928       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6929       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6930 
6931       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6932       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6933 
6934       JUMPHERE(jump);
6935       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6936       break;
6937 
6938       case PT_PXPRINT:
6939       /* C and Z groups are the farthest two groups. */
6940       SET_TYPE_OFFSET(ucp_Ll);
6941       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6942       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6943 
6944       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6945       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6946 
6947       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6948 
6949       /* In case of ucp_Cf, we overwrite the result. */
6950       SET_CHAR_OFFSET(0x2066);
6951       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6952       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6953 
6954       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6955       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6956 
6957       JUMPHERE(jump);
6958       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6959       break;
6960 
6961       case PT_PXPUNCT:
6962       SET_TYPE_OFFSET(ucp_Sc);
6963       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6964       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6965 
6966       SET_CHAR_OFFSET(0);
6967       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6968       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6969 
6970       SET_TYPE_OFFSET(ucp_Pc);
6971       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6972       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6973       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6974       break;
6975 
6976       default:
6977       SLJIT_UNREACHABLE();
6978       break;
6979       }
6980     cc += 2;
6981     }
6982 #endif
6983 
6984   if (jump != NULL)
6985     add_jump(compiler, compares > 0 ? list : backtracks, jump);
6986   }
6987 
6988 if (found != NULL)
6989   set_jumps(found, LABEL());
6990 }
6991 
6992 #undef SET_TYPE_OFFSET
6993 #undef SET_CHAR_OFFSET
6994 
6995 #endif
6996 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)6997 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
6998 {
6999 DEFINE_COMPILER;
7000 int length;
7001 struct sljit_jump *jump[4];
7002 #ifdef SUPPORT_UNICODE
7003 struct sljit_label *label;
7004 #endif /* SUPPORT_UNICODE */
7005 
7006 switch(type)
7007   {
7008   case OP_SOD:
7009   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7010   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7011   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7012   return cc;
7013 
7014   case OP_SOM:
7015   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7016   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7017   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7018   return cc;
7019 
7020   case OP_NOT_WORD_BOUNDARY:
7021   case OP_WORD_BOUNDARY:
7022   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
7023   sljit_set_current_flags(compiler, SLJIT_SET_Z);
7024   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7025   return cc;
7026 
7027   case OP_EODN:
7028   /* Requires rather complex checks. */
7029   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7030   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7031     {
7032     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7033     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7034     if (common->mode == PCRE2_JIT_COMPLETE)
7035       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7036     else
7037       {
7038       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7039       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7040       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7041       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7042       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7043       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7044       check_partial(common, TRUE);
7045       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7046       JUMPHERE(jump[1]);
7047       }
7048     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7049     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7050     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7051     }
7052   else if (common->nltype == NLTYPE_FIXED)
7053     {
7054     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7055     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7056     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7057     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7058     }
7059   else
7060     {
7061     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7062     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7063     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7064     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7065     jump[2] = JUMP(SLJIT_GREATER);
7066     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7067     /* Equal. */
7068     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7069     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7070     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7071 
7072     JUMPHERE(jump[1]);
7073     if (common->nltype == NLTYPE_ANYCRLF)
7074       {
7075       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7076       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7077       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7078       }
7079     else
7080       {
7081       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
7082       read_char_range(common, common->nlmin, common->nlmax, TRUE);
7083       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7084       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7085       sljit_set_current_flags(compiler, SLJIT_SET_Z);
7086       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7087       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7088       }
7089     JUMPHERE(jump[2]);
7090     JUMPHERE(jump[3]);
7091     }
7092   JUMPHERE(jump[0]);
7093   check_partial(common, FALSE);
7094   return cc;
7095 
7096   case OP_EOD:
7097   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7098   check_partial(common, FALSE);
7099   return cc;
7100 
7101   case OP_DOLL:
7102   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7103   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7104   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7105 
7106   if (!common->endonly)
7107     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
7108   else
7109     {
7110     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7111     check_partial(common, FALSE);
7112     }
7113   return cc;
7114 
7115   case OP_DOLLM:
7116   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
7117   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7118   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7119   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7120   check_partial(common, FALSE);
7121   jump[0] = JUMP(SLJIT_JUMP);
7122   JUMPHERE(jump[1]);
7123 
7124   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7125     {
7126     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7127     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7128     if (common->mode == PCRE2_JIT_COMPLETE)
7129       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
7130     else
7131       {
7132       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
7133       /* STR_PTR = STR_END - IN_UCHARS(1) */
7134       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7135       check_partial(common, TRUE);
7136       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7137       JUMPHERE(jump[1]);
7138       }
7139 
7140     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7141     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7142     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7143     }
7144   else
7145     {
7146     peek_char(common, common->nlmax);
7147     check_newlinechar(common, common->nltype, backtracks, FALSE);
7148     }
7149   JUMPHERE(jump[0]);
7150   return cc;
7151 
7152   case OP_CIRC:
7153   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7154   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7155   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7156   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7157   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7158   return cc;
7159 
7160   case OP_CIRCM:
7161   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7162   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7163   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
7164   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7165   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7166   jump[0] = JUMP(SLJIT_JUMP);
7167   JUMPHERE(jump[1]);
7168 
7169   if (!common->alt_circumflex)
7170     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7171 
7172   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7173     {
7174     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7175     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
7176     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
7177     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
7178     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7179     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7180     }
7181   else
7182     {
7183     skip_char_back(common);
7184     read_char_range(common, common->nlmin, common->nlmax, TRUE);
7185     check_newlinechar(common, common->nltype, backtracks, FALSE);
7186     }
7187   JUMPHERE(jump[0]);
7188   return cc;
7189 
7190   case OP_REVERSE:
7191   length = GET(cc, 0);
7192   if (length == 0)
7193     return cc + LINK_SIZE;
7194   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7195 #ifdef SUPPORT_UNICODE
7196   if (common->utf)
7197     {
7198     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7199     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
7200     label = LABEL();
7201     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
7202     skip_char_back(common);
7203     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7204     JUMPTO(SLJIT_NOT_ZERO, label);
7205     }
7206   else
7207 #endif
7208     {
7209     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7210     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
7211     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
7212     }
7213   check_start_used_ptr(common);
7214   return cc + LINK_SIZE;
7215   }
7216 SLJIT_UNREACHABLE();
7217 return cc;
7218 }
7219 
7220 #ifdef SUPPORT_UNICODE
7221 
7222 #if PCRE2_CODE_UNIT_WIDTH != 32
7223 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)7224 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
7225 {
7226 PCRE2_SPTR start_subject = args->begin;
7227 PCRE2_SPTR end_subject = args->end;
7228 int lgb, rgb, len, ricount;
7229 PCRE2_SPTR prevcc, bptr;
7230 uint32_t c;
7231 
7232 prevcc = cc;
7233 GETCHARINC(c, cc);
7234 lgb = UCD_GRAPHBREAK(c);
7235 
7236 while (cc < end_subject)
7237   {
7238   len = 1;
7239   GETCHARLEN(c, cc, len);
7240   rgb = UCD_GRAPHBREAK(c);
7241 
7242   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
7243 
7244   /* Not breaking between Regional Indicators is allowed only if there
7245   are an even number of preceding RIs. */
7246 
7247   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
7248     {
7249     ricount = 0;
7250     bptr = prevcc;
7251 
7252     /* bptr is pointing to the left-hand character */
7253     while (bptr > start_subject)
7254       {
7255       bptr--;
7256       BACKCHAR(bptr);
7257       GETCHAR(c, bptr);
7258 
7259       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
7260 
7261       ricount++;
7262       }
7263 
7264     if ((ricount & 1) != 0) break;  /* Grapheme break required */
7265     }
7266 
7267   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
7268   allows any number of them before a following Extended_Pictographic. */
7269 
7270   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
7271        lgb != ucp_gbExtended_Pictographic)
7272     lgb = rgb;
7273 
7274   prevcc = cc;
7275   cc += len;
7276   }
7277 
7278 return cc;
7279 }
7280 
7281 #endif
7282 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)7283 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
7284 {
7285 PCRE2_SPTR start_subject = args->begin;
7286 PCRE2_SPTR end_subject = args->end;
7287 int lgb, rgb, ricount;
7288 PCRE2_SPTR bptr;
7289 uint32_t c;
7290 
7291 GETCHARINC(c, cc);
7292 lgb = UCD_GRAPHBREAK(c);
7293 
7294 while (cc < end_subject)
7295   {
7296   c = *cc;
7297   rgb = UCD_GRAPHBREAK(c);
7298 
7299   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
7300 
7301   /* Not breaking between Regional Indicators is allowed only if there
7302   are an even number of preceding RIs. */
7303 
7304   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
7305     {
7306     ricount = 0;
7307     bptr = cc - 1;
7308 
7309     /* bptr is pointing to the left-hand character */
7310     while (bptr > start_subject)
7311       {
7312       bptr--;
7313       c = *bptr;
7314 
7315       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
7316 
7317       ricount++;
7318       }
7319 
7320     if ((ricount & 1) != 0) break;  /* Grapheme break required */
7321     }
7322 
7323   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
7324   allows any number of them before a following Extended_Pictographic. */
7325 
7326   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
7327        lgb != ucp_gbExtended_Pictographic)
7328     lgb = rgb;
7329 
7330   cc++;
7331   }
7332 
7333 return cc;
7334 }
7335 
7336 #endif
7337 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)7338 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
7339 {
7340 DEFINE_COMPILER;
7341 int length;
7342 unsigned int c, oc, bit;
7343 compare_context context;
7344 struct sljit_jump *jump[3];
7345 jump_list *end_list;
7346 #ifdef SUPPORT_UNICODE
7347 PCRE2_UCHAR propdata[5];
7348 #endif /* SUPPORT_UNICODE */
7349 
7350 switch(type)
7351   {
7352   case OP_NOT_DIGIT:
7353   case OP_DIGIT:
7354   /* Digits are usually 0-9, so it is worth to optimize them. */
7355   if (check_str_ptr)
7356     detect_partial_match(common, backtracks);
7357 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7358   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
7359     read_char7_type(common, type == OP_NOT_DIGIT);
7360   else
7361 #endif
7362     read_char8_type(common, type == OP_NOT_DIGIT);
7363     /* Flip the starting bit in the negative case. */
7364   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
7365   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7366   return cc;
7367 
7368   case OP_NOT_WHITESPACE:
7369   case OP_WHITESPACE:
7370   if (check_str_ptr)
7371     detect_partial_match(common, backtracks);
7372 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7373   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
7374     read_char7_type(common, type == OP_NOT_WHITESPACE);
7375   else
7376 #endif
7377     read_char8_type(common, type == OP_NOT_WHITESPACE);
7378   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
7379   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7380   return cc;
7381 
7382   case OP_NOT_WORDCHAR:
7383   case OP_WORDCHAR:
7384   if (check_str_ptr)
7385     detect_partial_match(common, backtracks);
7386 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7387   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
7388     read_char7_type(common, type == OP_NOT_WORDCHAR);
7389   else
7390 #endif
7391     read_char8_type(common, type == OP_NOT_WORDCHAR);
7392   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
7393   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7394   return cc;
7395 
7396   case OP_ANY:
7397   if (check_str_ptr)
7398     detect_partial_match(common, backtracks);
7399   read_char_range(common, common->nlmin, common->nlmax, TRUE);
7400   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7401     {
7402     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7403     end_list = NULL;
7404     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
7405       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7406     else
7407       check_str_end(common, &end_list);
7408 
7409     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7410     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
7411     set_jumps(end_list, LABEL());
7412     JUMPHERE(jump[0]);
7413     }
7414   else
7415     check_newlinechar(common, common->nltype, backtracks, TRUE);
7416   return cc;
7417 
7418   case OP_ALLANY:
7419   if (check_str_ptr)
7420     detect_partial_match(common, backtracks);
7421 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
7422   if (common->utf)
7423     {
7424     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7425     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7426 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
7427 #if PCRE2_CODE_UNIT_WIDTH == 8
7428     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
7429     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
7430     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7431 #elif PCRE2_CODE_UNIT_WIDTH == 16
7432     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
7433     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
7434     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
7435     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
7436     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7437     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7438 #endif
7439     JUMPHERE(jump[0]);
7440 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
7441     return cc;
7442     }
7443 #endif
7444   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7445   return cc;
7446 
7447   case OP_ANYBYTE:
7448   if (check_str_ptr)
7449     detect_partial_match(common, backtracks);
7450   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7451   return cc;
7452 
7453 #ifdef SUPPORT_UNICODE
7454   case OP_NOTPROP:
7455   case OP_PROP:
7456   propdata[0] = XCL_HASPROP;
7457   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
7458   propdata[2] = cc[0];
7459   propdata[3] = cc[1];
7460   propdata[4] = XCL_END;
7461   if (check_str_ptr)
7462     detect_partial_match(common, backtracks);
7463   compile_xclass_matchingpath(common, propdata, backtracks);
7464   return cc + 2;
7465 #endif
7466 
7467   case OP_ANYNL:
7468   if (check_str_ptr)
7469     detect_partial_match(common, backtracks);
7470   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
7471   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7472   /* We don't need to handle soft partial matching case. */
7473   end_list = NULL;
7474   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
7475     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7476   else
7477     check_str_end(common, &end_list);
7478   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7479   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7480   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7481   jump[2] = JUMP(SLJIT_JUMP);
7482   JUMPHERE(jump[0]);
7483   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
7484   set_jumps(end_list, LABEL());
7485   JUMPHERE(jump[1]);
7486   JUMPHERE(jump[2]);
7487   return cc;
7488 
7489   case OP_NOT_HSPACE:
7490   case OP_HSPACE:
7491   if (check_str_ptr)
7492     detect_partial_match(common, backtracks);
7493   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
7494   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
7495   sljit_set_current_flags(compiler, SLJIT_SET_Z);
7496   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7497   return cc;
7498 
7499   case OP_NOT_VSPACE:
7500   case OP_VSPACE:
7501   if (check_str_ptr)
7502     detect_partial_match(common, backtracks);
7503   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
7504   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
7505   sljit_set_current_flags(compiler, SLJIT_SET_Z);
7506   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7507   return cc;
7508 
7509 #ifdef SUPPORT_UNICODE
7510   case OP_EXTUNI:
7511   if (check_str_ptr)
7512     detect_partial_match(common, backtracks);
7513 
7514   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
7515   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
7516 
7517 #if PCRE2_CODE_UNIT_WIDTH != 32
7518   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
7519       common->utf ? SLJIT_FUNC_OFFSET(do_extuni_utf) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
7520 #else
7521   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
7522 #endif
7523 
7524   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
7525 
7526   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
7527     {
7528     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
7529     /* Since we successfully read a char above, partial matching must occure. */
7530     check_partial(common, TRUE);
7531     JUMPHERE(jump[0]);
7532     }
7533   return cc;
7534 #endif
7535 
7536   case OP_CHAR:
7537   case OP_CHARI:
7538   length = 1;
7539 #ifdef SUPPORT_UNICODE
7540   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
7541 #endif
7542   if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
7543       && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
7544     {
7545     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
7546     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
7547 
7548     context.length = IN_UCHARS(length);
7549     context.sourcereg = -1;
7550 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7551     context.ucharptr = 0;
7552 #endif
7553     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
7554     }
7555 
7556   if (check_str_ptr)
7557     detect_partial_match(common, backtracks);
7558 #ifdef SUPPORT_UNICODE
7559   if (common->utf)
7560     {
7561     GETCHAR(c, cc);
7562     }
7563   else
7564 #endif
7565     c = *cc;
7566 
7567   if (type == OP_CHAR || !char_has_othercase(common, cc))
7568     {
7569     read_char_range(common, c, c, FALSE);
7570     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7571     return cc + length;
7572     }
7573   oc = char_othercase(common, c);
7574   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
7575   bit = c ^ oc;
7576   if (is_powerof2(bit))
7577     {
7578     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
7579     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
7580     return cc + length;
7581     }
7582   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
7583   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
7584   JUMPHERE(jump[0]);
7585   return cc + length;
7586 
7587   case OP_NOT:
7588   case OP_NOTI:
7589   if (check_str_ptr)
7590     detect_partial_match(common, backtracks);
7591 
7592   length = 1;
7593 #ifdef SUPPORT_UNICODE
7594   if (common->utf)
7595     {
7596 #if PCRE2_CODE_UNIT_WIDTH == 8
7597     c = *cc;
7598     if (c < 128)
7599       {
7600       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7601       if (type == OP_NOT || !char_has_othercase(common, cc))
7602         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7603       else
7604         {
7605         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
7606         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
7607         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
7608         }
7609       /* Skip the variable-length character. */
7610       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7611       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
7612       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
7613       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7614       JUMPHERE(jump[0]);
7615       return cc + 1;
7616       }
7617     else
7618 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7619       {
7620       GETCHARLEN(c, cc, length);
7621       }
7622     }
7623   else
7624 #endif /* SUPPORT_UNICODE */
7625     c = *cc;
7626 
7627   if (type == OP_NOT || !char_has_othercase(common, cc))
7628     {
7629     read_char_range(common, c, c, TRUE);
7630     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7631     }
7632   else
7633     {
7634     oc = char_othercase(common, c);
7635     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
7636     bit = c ^ oc;
7637     if (is_powerof2(bit))
7638       {
7639       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
7640       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
7641       }
7642     else
7643       {
7644       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7645       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
7646       }
7647     }
7648   return cc + length;
7649 
7650   case OP_CLASS:
7651   case OP_NCLASS:
7652   if (check_str_ptr)
7653     detect_partial_match(common, backtracks);
7654 
7655 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7656   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
7657   read_char_range(common, 0, bit, type == OP_NCLASS);
7658 #else
7659   read_char_range(common, 0, 255, type == OP_NCLASS);
7660 #endif
7661 
7662   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
7663     return cc + 32 / sizeof(PCRE2_UCHAR);
7664 
7665 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7666   jump[0] = NULL;
7667   if (common->utf)
7668     {
7669     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
7670     if (type == OP_CLASS)
7671       {
7672       add_jump(compiler, backtracks, jump[0]);
7673       jump[0] = NULL;
7674       }
7675     }
7676 #elif PCRE2_CODE_UNIT_WIDTH != 8
7677   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7678   if (type == OP_CLASS)
7679     {
7680     add_jump(compiler, backtracks, jump[0]);
7681     jump[0] = NULL;
7682     }
7683 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
7684 
7685   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7686   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7687   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7688   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7689   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7690   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7691 
7692 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7693   if (jump[0] != NULL)
7694     JUMPHERE(jump[0]);
7695 #endif
7696   return cc + 32 / sizeof(PCRE2_UCHAR);
7697 
7698 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7699   case OP_XCLASS:
7700   if (check_str_ptr)
7701     detect_partial_match(common, backtracks);
7702   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
7703   return cc + GET(cc, 0) - 1;
7704 #endif
7705   }
7706 SLJIT_UNREACHABLE();
7707 return cc;
7708 }
7709 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)7710 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
7711 {
7712 /* This function consumes at least one input character. */
7713 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
7714 DEFINE_COMPILER;
7715 PCRE2_SPTR ccbegin = cc;
7716 compare_context context;
7717 int size;
7718 
7719 context.length = 0;
7720 do
7721   {
7722   if (cc >= ccend)
7723     break;
7724 
7725   if (*cc == OP_CHAR)
7726     {
7727     size = 1;
7728 #ifdef SUPPORT_UNICODE
7729     if (common->utf && HAS_EXTRALEN(cc[1]))
7730       size += GET_EXTRALEN(cc[1]);
7731 #endif
7732     }
7733   else if (*cc == OP_CHARI)
7734     {
7735     size = 1;
7736 #ifdef SUPPORT_UNICODE
7737     if (common->utf)
7738       {
7739       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
7740         size = 0;
7741       else if (HAS_EXTRALEN(cc[1]))
7742         size += GET_EXTRALEN(cc[1]);
7743       }
7744     else
7745 #endif
7746     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
7747       size = 0;
7748     }
7749   else
7750     size = 0;
7751 
7752   cc += 1 + size;
7753   context.length += IN_UCHARS(size);
7754   }
7755 while (size > 0 && context.length <= 128);
7756 
7757 cc = ccbegin;
7758 if (context.length > 0)
7759   {
7760   /* We have a fixed-length byte sequence. */
7761   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
7762   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
7763 
7764   context.sourcereg = -1;
7765 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7766   context.ucharptr = 0;
7767 #endif
7768   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
7769   return cc;
7770   }
7771 
7772 /* A non-fixed length character will be checked if length == 0. */
7773 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
7774 }
7775 
7776 /* Forward definitions. */
7777 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
7778 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
7779 
7780 #define PUSH_BACKTRACK(size, ccstart, error) \
7781   do \
7782     { \
7783     backtrack = sljit_alloc_memory(compiler, (size)); \
7784     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7785       return error; \
7786     memset(backtrack, 0, size); \
7787     backtrack->prev = parent->top; \
7788     backtrack->cc = (ccstart); \
7789     parent->top = backtrack; \
7790     } \
7791   while (0)
7792 
7793 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
7794   do \
7795     { \
7796     backtrack = sljit_alloc_memory(compiler, (size)); \
7797     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7798       return; \
7799     memset(backtrack, 0, size); \
7800     backtrack->prev = parent->top; \
7801     backtrack->cc = (ccstart); \
7802     parent->top = backtrack; \
7803     } \
7804   while (0)
7805 
7806 #define BACKTRACK_AS(type) ((type *)backtrack)
7807 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7808 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7809 {
7810 /* The OVECTOR offset goes to TMP2. */
7811 DEFINE_COMPILER;
7812 int count = GET2(cc, 1 + IMM2_SIZE);
7813 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
7814 unsigned int offset;
7815 jump_list *found = NULL;
7816 
7817 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
7818 
7819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7820 
7821 count--;
7822 while (count-- > 0)
7823   {
7824   offset = GET2(slot, 0) << 1;
7825   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
7826   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
7827   slot += common->name_entry_size;
7828   }
7829 
7830 offset = GET2(slot, 0) << 1;
7831 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
7832 if (backtracks != NULL && !common->unset_backref)
7833   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
7834 
7835 set_jumps(found, LABEL());
7836 }
7837 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)7838 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
7839 {
7840 DEFINE_COMPILER;
7841 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7842 int offset = 0;
7843 struct sljit_jump *jump = NULL;
7844 struct sljit_jump *partial;
7845 struct sljit_jump *nopartial;
7846 
7847 if (ref)
7848   {
7849   offset = GET2(cc, 1) << 1;
7850   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7851   /* OVECTOR(1) contains the "string begin - 1" constant. */
7852   if (withchecks && !common->unset_backref)
7853     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7854   }
7855 else
7856   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7857 
7858 #if defined SUPPORT_UNICODE
7859 if (common->utf && *cc == OP_REFI)
7860   {
7861   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
7862   if (ref)
7863     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7864   else
7865     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7866 
7867   if (withchecks)
7868     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
7869   /* No free saved registers so save data on stack. */
7870 
7871   OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
7872   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
7873   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
7874 
7875   if (common->mode == PCRE2_JIT_COMPLETE)
7876     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
7877   else
7878     {
7879     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
7880 
7881     add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
7882 
7883     nopartial = JUMP(SLJIT_NOT_EQUAL);
7884     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7885     check_partial(common, FALSE);
7886     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7887     JUMPHERE(nopartial);
7888     }
7889   }
7890 else
7891 #endif /* SUPPORT_UNICODE */
7892   {
7893   if (ref)
7894     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
7895   else
7896     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
7897 
7898   if (withchecks)
7899     jump = JUMP(SLJIT_ZERO);
7900 
7901   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7902   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
7903   if (common->mode == PCRE2_JIT_COMPLETE)
7904     add_jump(compiler, backtracks, partial);
7905 
7906   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7907   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7908 
7909   if (common->mode != PCRE2_JIT_COMPLETE)
7910     {
7911     nopartial = JUMP(SLJIT_JUMP);
7912     JUMPHERE(partial);
7913     /* TMP2 -= STR_END - STR_PTR */
7914     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
7915     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
7916     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7917     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7918     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7919     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7920     JUMPHERE(partial);
7921     check_partial(common, FALSE);
7922     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7923     JUMPHERE(nopartial);
7924     }
7925   }
7926 
7927 if (jump != NULL)
7928   {
7929   if (emptyfail)
7930     add_jump(compiler, backtracks, jump);
7931   else
7932     JUMPHERE(jump);
7933   }
7934 }
7935 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7936 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7937 {
7938 DEFINE_COMPILER;
7939 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7940 backtrack_common *backtrack;
7941 PCRE2_UCHAR type;
7942 int offset = 0;
7943 struct sljit_label *label;
7944 struct sljit_jump *zerolength;
7945 struct sljit_jump *jump = NULL;
7946 PCRE2_SPTR ccbegin = cc;
7947 int min = 0, max = 0;
7948 BOOL minimize;
7949 
7950 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7951 
7952 if (ref)
7953   offset = GET2(cc, 1) << 1;
7954 else
7955   cc += IMM2_SIZE;
7956 type = cc[1 + IMM2_SIZE];
7957 
7958 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7959 minimize = (type & 0x1) != 0;
7960 switch(type)
7961   {
7962   case OP_CRSTAR:
7963   case OP_CRMINSTAR:
7964   min = 0;
7965   max = 0;
7966   cc += 1 + IMM2_SIZE + 1;
7967   break;
7968   case OP_CRPLUS:
7969   case OP_CRMINPLUS:
7970   min = 1;
7971   max = 0;
7972   cc += 1 + IMM2_SIZE + 1;
7973   break;
7974   case OP_CRQUERY:
7975   case OP_CRMINQUERY:
7976   min = 0;
7977   max = 1;
7978   cc += 1 + IMM2_SIZE + 1;
7979   break;
7980   case OP_CRRANGE:
7981   case OP_CRMINRANGE:
7982   min = GET2(cc, 1 + IMM2_SIZE + 1);
7983   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7984   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7985   break;
7986   default:
7987   SLJIT_UNREACHABLE();
7988   break;
7989   }
7990 
7991 if (!minimize)
7992   {
7993   if (min == 0)
7994     {
7995     allocate_stack(common, 2);
7996     if (ref)
7997       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7998     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7999     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8000     /* Temporary release of STR_PTR. */
8001     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8002     /* Handles both invalid and empty cases. Since the minimum repeat,
8003     is zero the invalid case is basically the same as an empty case. */
8004     if (ref)
8005       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8006     else
8007       {
8008       compile_dnref_search(common, ccbegin, NULL);
8009       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8010       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
8011       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8012       }
8013     /* Restore if not zero length. */
8014     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8015     }
8016   else
8017     {
8018     allocate_stack(common, 1);
8019     if (ref)
8020       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8021     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8022     if (ref)
8023       {
8024       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8025       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8026       }
8027     else
8028       {
8029       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
8030       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8031       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
8032       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8033       }
8034     }
8035 
8036   if (min > 1 || max > 1)
8037     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
8038 
8039   label = LABEL();
8040   if (!ref)
8041     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
8042   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
8043 
8044   if (min > 1 || max > 1)
8045     {
8046     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8047     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8048     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8049     if (min > 1)
8050       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8051     if (max > 1)
8052       {
8053       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8054       allocate_stack(common, 1);
8055       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8056       JUMPTO(SLJIT_JUMP, label);
8057       JUMPHERE(jump);
8058       }
8059     }
8060 
8061   if (max == 0)
8062     {
8063     /* Includes min > 1 case as well. */
8064     allocate_stack(common, 1);
8065     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8066     JUMPTO(SLJIT_JUMP, label);
8067     }
8068 
8069   JUMPHERE(zerolength);
8070   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8071 
8072   count_match(common);
8073   return cc;
8074   }
8075 
8076 allocate_stack(common, ref ? 2 : 3);
8077 if (ref)
8078   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8080 if (type != OP_CRMINSTAR)
8081   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8082 
8083 if (min == 0)
8084   {
8085   /* Handles both invalid and empty cases. Since the minimum repeat,
8086   is zero the invalid case is basically the same as an empty case. */
8087   if (ref)
8088     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8089   else
8090     {
8091     compile_dnref_search(common, ccbegin, NULL);
8092     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8093     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8094     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8095     }
8096   /* Length is non-zero, we can match real repeats. */
8097   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8098   jump = JUMP(SLJIT_JUMP);
8099   }
8100 else
8101   {
8102   if (ref)
8103     {
8104     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8105     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8106     }
8107   else
8108     {
8109     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
8110     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8111     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8112     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8113     }
8114   }
8115 
8116 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8117 if (max > 0)
8118   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8119 
8120 if (!ref)
8121   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8122 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
8123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8124 
8125 if (min > 1)
8126   {
8127   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8128   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8129   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8130   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8131   }
8132 else if (max > 0)
8133   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8134 
8135 if (jump != NULL)
8136   JUMPHERE(jump);
8137 JUMPHERE(zerolength);
8138 
8139 count_match(common);
8140 return cc;
8141 }
8142 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8143 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8144 {
8145 DEFINE_COMPILER;
8146 backtrack_common *backtrack;
8147 recurse_entry *entry = common->entries;
8148 recurse_entry *prev = NULL;
8149 sljit_sw start = GET(cc, 1);
8150 PCRE2_SPTR start_cc;
8151 BOOL needs_control_head;
8152 
8153 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
8154 
8155 /* Inlining simple patterns. */
8156 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8157   {
8158   start_cc = common->start + start;
8159   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8160   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8161   return cc + 1 + LINK_SIZE;
8162   }
8163 
8164 while (entry != NULL)
8165   {
8166   if (entry->start == start)
8167     break;
8168   prev = entry;
8169   entry = entry->next;
8170   }
8171 
8172 if (entry == NULL)
8173   {
8174   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8175   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8176     return NULL;
8177   entry->next = NULL;
8178   entry->entry_label = NULL;
8179   entry->backtrack_label = NULL;
8180   entry->entry_calls = NULL;
8181   entry->backtrack_calls = NULL;
8182   entry->start = start;
8183 
8184   if (prev != NULL)
8185     prev->next = entry;
8186   else
8187     common->entries = entry;
8188   }
8189 
8190 BACKTRACK_AS(recurse_backtrack)->entry = entry;
8191 
8192 if (entry->entry_label == NULL)
8193   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8194 else
8195   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8196 /* Leave if the match is failed. */
8197 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8198 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8199 return cc + 1 + LINK_SIZE;
8200 }
8201 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)8202 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8203 {
8204 PCRE2_SPTR begin;
8205 PCRE2_SIZE *ovector;
8206 sljit_u32 oveccount, capture_top;
8207 
8208 if (arguments->callout == NULL)
8209   return 0;
8210 
8211 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8212 
8213 begin = arguments->begin;
8214 ovector = (PCRE2_SIZE*)(callout_block + 1);
8215 oveccount = callout_block->capture_top;
8216 
8217 SLJIT_ASSERT(oveccount >= 1);
8218 
8219 callout_block->version = 2;
8220 callout_block->callout_flags = 0;
8221 
8222 /* Offsets in subject. */
8223 callout_block->subject_length = arguments->end - arguments->begin;
8224 callout_block->start_match = jit_ovector[0] - begin;
8225 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8226 callout_block->subject = begin;
8227 
8228 /* Convert and copy the JIT offset vector to the ovector array. */
8229 callout_block->capture_top = 1;
8230 callout_block->offset_vector = ovector;
8231 
8232 ovector[0] = PCRE2_UNSET;
8233 ovector[1] = PCRE2_UNSET;
8234 ovector += 2;
8235 jit_ovector += 2;
8236 capture_top = 1;
8237 
8238 /* Convert pointers to sizes. */
8239 while (--oveccount != 0)
8240   {
8241   capture_top++;
8242 
8243   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8244   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8245 
8246   if (ovector[0] != PCRE2_UNSET)
8247     callout_block->capture_top = capture_top;
8248 
8249   ovector += 2;
8250   jit_ovector += 2;
8251   }
8252 
8253 return (arguments->callout)(callout_block, arguments->callout_data);
8254 }
8255 
8256 #define CALLOUT_ARG_OFFSET(arg) \
8257     SLJIT_OFFSETOF(pcre2_callout_block, arg)
8258 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8259 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8260 {
8261 DEFINE_COMPILER;
8262 backtrack_common *backtrack;
8263 sljit_s32 mov_opcode;
8264 unsigned int callout_length = (*cc == OP_CALLOUT)
8265     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8266 sljit_sw value1;
8267 sljit_sw value2;
8268 sljit_sw value3;
8269 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
8270 
8271 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8272 
8273 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8274 
8275 allocate_stack(common, callout_arg_size);
8276 
8277 SLJIT_ASSERT(common->capture_last_ptr != 0);
8278 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8279 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8280 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8281 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8282 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8283 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8284 
8285 /* These pointer sized fields temporarly stores internal variables. */
8286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8287 
8288 if (common->mark_ptr != 0)
8289   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8290 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8291 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8292 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8293 
8294 if (*cc == OP_CALLOUT)
8295   {
8296   value1 = 0;
8297   value2 = 0;
8298   value3 = 0;
8299   }
8300 else
8301   {
8302   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8303   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8304   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8305   }
8306 
8307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8308 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8309 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8311 
8312 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8313 
8314 /* Needed to save important temporary registers. */
8315 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
8316 /* SLJIT_R0 = arguments */
8317 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8318 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8319 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
8320 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8321 free_stack(common, callout_arg_size);
8322 
8323 /* Check return value. */
8324 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8325 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
8326 if (common->abort_label == NULL)
8327   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
8328 else
8329   JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
8330 return cc + callout_length;
8331 }
8332 
8333 #undef CALLOUT_ARG_SIZE
8334 #undef CALLOUT_ARG_OFFSET
8335 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)8336 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8337 {
8338 while (TRUE)
8339   {
8340   switch (*cc)
8341     {
8342     case OP_CALLOUT_STR:
8343     cc += GET(cc, 1 + 2*LINK_SIZE);
8344     break;
8345 
8346     case OP_NOT_WORD_BOUNDARY:
8347     case OP_WORD_BOUNDARY:
8348     case OP_CIRC:
8349     case OP_CIRCM:
8350     case OP_DOLL:
8351     case OP_DOLLM:
8352     case OP_CALLOUT:
8353     case OP_ALT:
8354     cc += PRIV(OP_lengths)[*cc];
8355     break;
8356 
8357     case OP_KET:
8358     return FALSE;
8359 
8360     default:
8361     return TRUE;
8362     }
8363   }
8364 }
8365 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)8366 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8367 {
8368 DEFINE_COMPILER;
8369 int framesize;
8370 int extrasize;
8371 BOOL local_quit_available = FALSE;
8372 BOOL needs_control_head;
8373 int private_data_ptr;
8374 backtrack_common altbacktrack;
8375 PCRE2_SPTR ccbegin;
8376 PCRE2_UCHAR opcode;
8377 PCRE2_UCHAR bra = OP_BRA;
8378 jump_list *tmp = NULL;
8379 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
8380 jump_list **found;
8381 /* Saving previous accept variables. */
8382 BOOL save_local_quit_available = common->local_quit_available;
8383 BOOL save_in_positive_assertion = common->in_positive_assertion;
8384 then_trap_backtrack *save_then_trap = common->then_trap;
8385 struct sljit_label *save_quit_label = common->quit_label;
8386 struct sljit_label *save_accept_label = common->accept_label;
8387 jump_list *save_quit = common->quit;
8388 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8389 jump_list *save_accept = common->accept;
8390 struct sljit_jump *jump;
8391 struct sljit_jump *brajump = NULL;
8392 
8393 /* Assert captures then. */
8394 common->then_trap = NULL;
8395 
8396 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8397   {
8398   SLJIT_ASSERT(!conditional);
8399   bra = *cc;
8400   cc++;
8401   }
8402 private_data_ptr = PRIVATE_DATA(cc);
8403 SLJIT_ASSERT(private_data_ptr != 0);
8404 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8405 backtrack->framesize = framesize;
8406 backtrack->private_data_ptr = private_data_ptr;
8407 opcode = *cc;
8408 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8409 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8410 ccbegin = cc;
8411 cc += GET(cc, 1);
8412 
8413 if (bra == OP_BRAMINZERO)
8414   {
8415   /* This is a braminzero backtrack path. */
8416   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8417   free_stack(common, 1);
8418   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8419   }
8420 
8421 if (framesize < 0)
8422   {
8423   extrasize = 1;
8424   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
8425     extrasize = 0;
8426 
8427   if (needs_control_head)
8428     extrasize++;
8429 
8430   if (framesize == no_frame)
8431     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8432 
8433   if (extrasize > 0)
8434     allocate_stack(common, extrasize);
8435 
8436   if (needs_control_head)
8437     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8438 
8439   if (extrasize > 0)
8440     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8441 
8442   if (needs_control_head)
8443     {
8444     SLJIT_ASSERT(extrasize == 2);
8445     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8446     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8447     }
8448   }
8449 else
8450   {
8451   extrasize = needs_control_head ? 3 : 2;
8452   allocate_stack(common, framesize + extrasize);
8453 
8454   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8455   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
8456   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8457   if (needs_control_head)
8458     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8459   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8460 
8461   if (needs_control_head)
8462     {
8463     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8464     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8465     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8466     }
8467   else
8468     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8469 
8470   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
8471   }
8472 
8473 memset(&altbacktrack, 0, sizeof(backtrack_common));
8474 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
8475   {
8476   /* Control verbs cannot escape from these asserts. */
8477   local_quit_available = TRUE;
8478   common->local_quit_available = TRUE;
8479   common->quit_label = NULL;
8480   common->quit = NULL;
8481   }
8482 
8483 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
8484 common->positive_assertion_quit = NULL;
8485 
8486 while (1)
8487   {
8488   common->accept_label = NULL;
8489   common->accept = NULL;
8490   altbacktrack.top = NULL;
8491   altbacktrack.topbacktracks = NULL;
8492 
8493   if (*ccbegin == OP_ALT && extrasize > 0)
8494     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8495 
8496   altbacktrack.cc = ccbegin;
8497   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
8498   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8499     {
8500     if (local_quit_available)
8501       {
8502       common->local_quit_available = save_local_quit_available;
8503       common->quit_label = save_quit_label;
8504       common->quit = save_quit;
8505       }
8506     common->in_positive_assertion = save_in_positive_assertion;
8507     common->then_trap = save_then_trap;
8508     common->accept_label = save_accept_label;
8509     common->positive_assertion_quit = save_positive_assertion_quit;
8510     common->accept = save_accept;
8511     return NULL;
8512     }
8513   common->accept_label = LABEL();
8514   if (common->accept != NULL)
8515     set_jumps(common->accept, common->accept_label);
8516 
8517   /* Reset stack. */
8518   if (framesize < 0)
8519     {
8520     if (framesize == no_frame)
8521       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8522     else if (extrasize > 0)
8523       free_stack(common, extrasize);
8524 
8525     if (needs_control_head)
8526       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8527     }
8528   else
8529     {
8530     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
8531       {
8532       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8533       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8534       if (needs_control_head)
8535         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8536       }
8537     else
8538       {
8539       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8540       if (needs_control_head)
8541         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
8542       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8543       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8544       }
8545     }
8546 
8547   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
8548     {
8549     /* We know that STR_PTR was stored on the top of the stack. */
8550     if (conditional)
8551       {
8552       if (extrasize > 0)
8553         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
8554       }
8555     else if (bra == OP_BRAZERO)
8556       {
8557       if (framesize < 0)
8558         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
8559       else
8560         {
8561         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
8562         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
8563         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8564         }
8565       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8566       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8567       }
8568     else if (framesize >= 0)
8569       {
8570       /* For OP_BRA and OP_BRAMINZERO. */
8571       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
8572       }
8573     }
8574   add_jump(compiler, found, JUMP(SLJIT_JUMP));
8575 
8576   compile_backtrackingpath(common, altbacktrack.top);
8577   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8578     {
8579     if (local_quit_available)
8580       {
8581       common->local_quit_available = save_local_quit_available;
8582       common->quit_label = save_quit_label;
8583       common->quit = save_quit;
8584       }
8585     common->in_positive_assertion = save_in_positive_assertion;
8586     common->then_trap = save_then_trap;
8587     common->accept_label = save_accept_label;
8588     common->positive_assertion_quit = save_positive_assertion_quit;
8589     common->accept = save_accept;
8590     return NULL;
8591     }
8592   set_jumps(altbacktrack.topbacktracks, LABEL());
8593 
8594   if (*cc != OP_ALT)
8595     break;
8596 
8597   ccbegin = cc;
8598   cc += GET(cc, 1);
8599   }
8600 
8601 if (local_quit_available)
8602   {
8603   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
8604   /* Makes the check less complicated below. */
8605   common->positive_assertion_quit = common->quit;
8606   }
8607 
8608 /* None of them matched. */
8609 if (common->positive_assertion_quit != NULL)
8610   {
8611   jump = JUMP(SLJIT_JUMP);
8612   set_jumps(common->positive_assertion_quit, LABEL());
8613   SLJIT_ASSERT(framesize != no_stack);
8614   if (framesize < 0)
8615     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
8616   else
8617     {
8618     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8619     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8620     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
8621     }
8622   JUMPHERE(jump);
8623   }
8624 
8625 if (needs_control_head)
8626   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
8627 
8628 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
8629   {
8630   /* Assert is failed. */
8631   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
8632     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8633 
8634   if (framesize < 0)
8635     {
8636     /* The topmost item should be 0. */
8637     if (bra == OP_BRAZERO)
8638       {
8639       if (extrasize == 2)
8640         free_stack(common, 1);
8641       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8642       }
8643     else if (extrasize > 0)
8644       free_stack(common, extrasize);
8645     }
8646   else
8647     {
8648     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
8649     /* The topmost item should be 0. */
8650     if (bra == OP_BRAZERO)
8651       {
8652       free_stack(common, framesize + extrasize - 1);
8653       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8654       }
8655     else
8656       free_stack(common, framesize + extrasize);
8657     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8658     }
8659   jump = JUMP(SLJIT_JUMP);
8660   if (bra != OP_BRAZERO)
8661     add_jump(compiler, target, jump);
8662 
8663   /* Assert is successful. */
8664   set_jumps(tmp, LABEL());
8665   if (framesize < 0)
8666     {
8667     /* We know that STR_PTR was stored on the top of the stack. */
8668     if (extrasize > 0)
8669       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
8670 
8671     /* Keep the STR_PTR on the top of the stack. */
8672     if (bra == OP_BRAZERO)
8673       {
8674       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8675       if (extrasize == 2)
8676         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8677       }
8678     else if (bra == OP_BRAMINZERO)
8679       {
8680       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8681       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8682       }
8683     }
8684   else
8685     {
8686     if (bra == OP_BRA)
8687       {
8688       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8689       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8690       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
8691       }
8692     else
8693       {
8694       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8695       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
8696       if (extrasize == 2)
8697         {
8698         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8699         if (bra == OP_BRAMINZERO)
8700           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8701         }
8702       else
8703         {
8704         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
8705         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
8706         }
8707       }
8708     }
8709 
8710   if (bra == OP_BRAZERO)
8711     {
8712     backtrack->matchingpath = LABEL();
8713     SET_LABEL(jump, backtrack->matchingpath);
8714     }
8715   else if (bra == OP_BRAMINZERO)
8716     {
8717     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
8718     JUMPHERE(brajump);
8719     if (framesize >= 0)
8720       {
8721       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8722       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8723       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
8724       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8725       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8726       }
8727     set_jumps(backtrack->common.topbacktracks, LABEL());
8728     }
8729   }
8730 else
8731   {
8732   /* AssertNot is successful. */
8733   if (framesize < 0)
8734     {
8735     if (extrasize > 0)
8736       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8737 
8738     if (bra != OP_BRA)
8739       {
8740       if (extrasize == 2)
8741         free_stack(common, 1);
8742       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8743       }
8744     else if (extrasize > 0)
8745       free_stack(common, extrasize);
8746     }
8747   else
8748     {
8749     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8750     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
8751     /* The topmost item should be 0. */
8752     if (bra != OP_BRA)
8753       {
8754       free_stack(common, framesize + extrasize - 1);
8755       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8756       }
8757     else
8758       free_stack(common, framesize + extrasize);
8759     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8760     }
8761 
8762   if (bra == OP_BRAZERO)
8763     backtrack->matchingpath = LABEL();
8764   else if (bra == OP_BRAMINZERO)
8765     {
8766     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
8767     JUMPHERE(brajump);
8768     }
8769 
8770   if (bra != OP_BRA)
8771     {
8772     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
8773     set_jumps(backtrack->common.topbacktracks, LABEL());
8774     backtrack->common.topbacktracks = NULL;
8775     }
8776   }
8777 
8778 if (local_quit_available)
8779   {
8780   common->local_quit_available = save_local_quit_available;
8781   common->quit_label = save_quit_label;
8782   common->quit = save_quit;
8783   }
8784 common->in_positive_assertion = save_in_positive_assertion;
8785 common->then_trap = save_then_trap;
8786 common->accept_label = save_accept_label;
8787 common->positive_assertion_quit = save_positive_assertion_quit;
8788 common->accept = save_accept;
8789 return cc + 1 + LINK_SIZE;
8790 }
8791 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)8792 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
8793 {
8794 DEFINE_COMPILER;
8795 int stacksize;
8796 
8797 if (framesize < 0)
8798   {
8799   if (framesize == no_frame)
8800     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8801   else
8802     {
8803     stacksize = needs_control_head ? 1 : 0;
8804     if (ket != OP_KET || has_alternatives)
8805       stacksize++;
8806 
8807     if (stacksize > 0)
8808       free_stack(common, stacksize);
8809     }
8810 
8811   if (needs_control_head)
8812     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
8813 
8814   /* TMP2 which is set here used by OP_KETRMAX below. */
8815   if (ket == OP_KETRMAX)
8816     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
8817   else if (ket == OP_KETRMIN)
8818     {
8819     /* Move the STR_PTR to the private_data_ptr. */
8820     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8821     }
8822   }
8823 else
8824   {
8825   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
8826   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
8827   if (needs_control_head)
8828     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
8829 
8830   if (ket == OP_KETRMAX)
8831     {
8832     /* TMP2 which is set here used by OP_KETRMAX below. */
8833     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8834     }
8835   }
8836 if (needs_control_head)
8837   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
8838 }
8839 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)8840 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
8841 {
8842 DEFINE_COMPILER;
8843 
8844 if (common->capture_last_ptr != 0)
8845   {
8846   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8847   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8848   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8849   stacksize++;
8850   }
8851 if (common->optimized_cbracket[offset >> 1] == 0)
8852   {
8853   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8854   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8855   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8856   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8857   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8858   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8859   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8860   stacksize += 2;
8861   }
8862 return stacksize;
8863 }
8864 
8865 /*
8866   Handling bracketed expressions is probably the most complex part.
8867 
8868   Stack layout naming characters:
8869     S - Push the current STR_PTR
8870     0 - Push a 0 (NULL)
8871     A - Push the current STR_PTR. Needed for restoring the STR_PTR
8872         before the next alternative. Not pushed if there are no alternatives.
8873     M - Any values pushed by the current alternative. Can be empty, or anything.
8874     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
8875     L - Push the previous local (pointed by localptr) to the stack
8876    () - opional values stored on the stack
8877   ()* - optonal, can be stored multiple times
8878 
8879   The following list shows the regular expression templates, their PCRE byte codes
8880   and stack layout supported by pcre-sljit.
8881 
8882   (?:)                     OP_BRA     | OP_KET                A M
8883   ()                       OP_CBRA    | OP_KET                C M
8884   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
8885                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
8886   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
8887                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
8888   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
8889                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
8890   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
8891                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
8892   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
8893   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
8894   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
8895   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
8896   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
8897            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
8898   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
8899            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
8900   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
8901            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
8902   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
8903            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
8904 
8905 
8906   Stack layout naming characters:
8907     A - Push the alternative index (starting from 0) on the stack.
8908         Not pushed if there is no alternatives.
8909     M - Any values pushed by the current alternative. Can be empty, or anything.
8910 
8911   The next list shows the possible content of a bracket:
8912   (|)     OP_*BRA    | OP_ALT ...         M A
8913   (?()|)  OP_*COND   | OP_ALT             M A
8914   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
8915                                           Or nothing, if trace is unnecessary
8916 */
8917 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8918 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8919 {
8920 DEFINE_COMPILER;
8921 backtrack_common *backtrack;
8922 PCRE2_UCHAR opcode;
8923 int private_data_ptr = 0;
8924 int offset = 0;
8925 int i, stacksize;
8926 int repeat_ptr = 0, repeat_length = 0;
8927 int repeat_type = 0, repeat_count = 0;
8928 PCRE2_SPTR ccbegin;
8929 PCRE2_SPTR matchingpath;
8930 PCRE2_SPTR slot;
8931 PCRE2_UCHAR bra = OP_BRA;
8932 PCRE2_UCHAR ket;
8933 assert_backtrack *assert;
8934 BOOL has_alternatives;
8935 BOOL needs_control_head = FALSE;
8936 struct sljit_jump *jump;
8937 struct sljit_jump *skip;
8938 struct sljit_label *rmax_label = NULL;
8939 struct sljit_jump *braminzero = NULL;
8940 
8941 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
8942 
8943 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8944   {
8945   bra = *cc;
8946   cc++;
8947   opcode = *cc;
8948   }
8949 
8950 opcode = *cc;
8951 ccbegin = cc;
8952 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8953 ket = *matchingpath;
8954 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8955   {
8956   repeat_ptr = PRIVATE_DATA(matchingpath);
8957   repeat_length = PRIVATE_DATA(matchingpath + 1);
8958   repeat_type = PRIVATE_DATA(matchingpath + 2);
8959   repeat_count = PRIVATE_DATA(matchingpath + 3);
8960   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8961   if (repeat_type == OP_UPTO)
8962     ket = OP_KETRMAX;
8963   if (repeat_type == OP_MINUPTO)
8964     ket = OP_KETRMIN;
8965   }
8966 
8967 matchingpath = ccbegin + 1 + LINK_SIZE;
8968 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8969 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8970 cc += GET(cc, 1);
8971 
8972 has_alternatives = *cc == OP_ALT;
8973 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8974   {
8975   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
8976     compile_time_checks_must_be_grouped_together);
8977   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8978   }
8979 
8980 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8981   opcode = OP_SCOND;
8982 
8983 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8984   {
8985   /* Capturing brackets has a pre-allocated space. */
8986   offset = GET2(ccbegin, 1 + LINK_SIZE);
8987   if (common->optimized_cbracket[offset] == 0)
8988     {
8989     private_data_ptr = OVECTOR_PRIV(offset);
8990     offset <<= 1;
8991     }
8992   else
8993     {
8994     offset <<= 1;
8995     private_data_ptr = OVECTOR(offset);
8996     }
8997   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8998   matchingpath += IMM2_SIZE;
8999   }
9000 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
9001   {
9002   /* Other brackets simply allocate the next entry. */
9003   private_data_ptr = PRIVATE_DATA(ccbegin);
9004   SLJIT_ASSERT(private_data_ptr != 0);
9005   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9006   if (opcode == OP_ONCE)
9007     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9008   }
9009 
9010 /* Instructions before the first alternative. */
9011 stacksize = 0;
9012 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9013   stacksize++;
9014 if (bra == OP_BRAZERO)
9015   stacksize++;
9016 
9017 if (stacksize > 0)
9018   allocate_stack(common, stacksize);
9019 
9020 stacksize = 0;
9021 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9022   {
9023   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9024   stacksize++;
9025   }
9026 
9027 if (bra == OP_BRAZERO)
9028   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9029 
9030 if (bra == OP_BRAMINZERO)
9031   {
9032   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9033   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9034   if (ket != OP_KETRMIN)
9035     {
9036     free_stack(common, 1);
9037     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9038     }
9039   else
9040     {
9041     if (opcode == OP_ONCE || opcode >= OP_SBRA)
9042       {
9043       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9044       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9045       /* Nothing stored during the first run. */
9046       skip = JUMP(SLJIT_JUMP);
9047       JUMPHERE(jump);
9048       /* Checking zero-length iteration. */
9049       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9050         {
9051         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9052         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9053         }
9054       else
9055         {
9056         /* Except when the whole stack frame must be saved. */
9057         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9058         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9059         }
9060       JUMPHERE(skip);
9061       }
9062     else
9063       {
9064       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9065       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9066       JUMPHERE(jump);
9067       }
9068     }
9069   }
9070 
9071 if (repeat_type != 0)
9072   {
9073   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9074   if (repeat_type == OP_EXACT)
9075     rmax_label = LABEL();
9076   }
9077 
9078 if (ket == OP_KETRMIN)
9079   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9080 
9081 if (ket == OP_KETRMAX)
9082   {
9083   rmax_label = LABEL();
9084   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
9085     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9086   }
9087 
9088 /* Handling capturing brackets and alternatives. */
9089 if (opcode == OP_ONCE)
9090   {
9091   stacksize = 0;
9092   if (needs_control_head)
9093     {
9094     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9095     stacksize++;
9096     }
9097 
9098   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9099     {
9100     /* Neither capturing brackets nor recursions are found in the block. */
9101     if (ket == OP_KETRMIN)
9102       {
9103       stacksize += 2;
9104       if (!needs_control_head)
9105         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9106       }
9107     else
9108       {
9109       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9110         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9111       if (ket == OP_KETRMAX || has_alternatives)
9112         stacksize++;
9113       }
9114 
9115     if (stacksize > 0)
9116       allocate_stack(common, stacksize);
9117 
9118     stacksize = 0;
9119     if (needs_control_head)
9120       {
9121       stacksize++;
9122       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9123       }
9124 
9125     if (ket == OP_KETRMIN)
9126       {
9127       if (needs_control_head)
9128         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9129       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9130       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9131         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9132       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9133       }
9134     else if (ket == OP_KETRMAX || has_alternatives)
9135       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9136     }
9137   else
9138     {
9139     if (ket != OP_KET || has_alternatives)
9140       stacksize++;
9141 
9142     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9143     allocate_stack(common, stacksize);
9144 
9145     if (needs_control_head)
9146       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9147 
9148     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9149     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9150 
9151     stacksize = needs_control_head ? 1 : 0;
9152     if (ket != OP_KET || has_alternatives)
9153       {
9154       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9155       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9156       stacksize++;
9157       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9158       }
9159     else
9160       {
9161       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9162       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9163       }
9164     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9165     }
9166   }
9167 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9168   {
9169   /* Saving the previous values. */
9170   if (common->optimized_cbracket[offset >> 1] != 0)
9171     {
9172     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9173     allocate_stack(common, 2);
9174     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9175     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9176     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9177     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9178     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9179     }
9180   else
9181     {
9182     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9183     allocate_stack(common, 1);
9184     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9185     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9186     }
9187   }
9188 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9189   {
9190   /* Saving the previous value. */
9191   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9192   allocate_stack(common, 1);
9193   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9194   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9195   }
9196 else if (has_alternatives)
9197   {
9198   /* Pushing the starting string pointer. */
9199   allocate_stack(common, 1);
9200   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9201   }
9202 
9203 /* Generating code for the first alternative. */
9204 if (opcode == OP_COND || opcode == OP_SCOND)
9205   {
9206   if (*matchingpath == OP_CREF)
9207     {
9208     SLJIT_ASSERT(has_alternatives);
9209     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
9210       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9211     matchingpath += 1 + IMM2_SIZE;
9212     }
9213   else if (*matchingpath == OP_DNCREF)
9214     {
9215     SLJIT_ASSERT(has_alternatives);
9216 
9217     i = GET2(matchingpath, 1 + IMM2_SIZE);
9218     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9219     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9220     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9221     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9222     slot += common->name_entry_size;
9223     i--;
9224     while (i-- > 0)
9225       {
9226       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9227       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9228       slot += common->name_entry_size;
9229       }
9230     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9231     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
9232     matchingpath += 1 + 2 * IMM2_SIZE;
9233     }
9234   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
9235     {
9236     /* Never has other case. */
9237     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
9238     SLJIT_ASSERT(!has_alternatives);
9239 
9240     if (*matchingpath == OP_TRUE)
9241       {
9242       stacksize = 1;
9243       matchingpath++;
9244       }
9245     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
9246       stacksize = 0;
9247     else if (*matchingpath == OP_RREF)
9248       {
9249       stacksize = GET2(matchingpath, 1);
9250       if (common->currententry == NULL)
9251         stacksize = 0;
9252       else if (stacksize == RREF_ANY)
9253         stacksize = 1;
9254       else if (common->currententry->start == 0)
9255         stacksize = stacksize == 0;
9256       else
9257         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9258 
9259       if (stacksize != 0)
9260         matchingpath += 1 + IMM2_SIZE;
9261       }
9262     else
9263       {
9264       if (common->currententry == NULL || common->currententry->start == 0)
9265         stacksize = 0;
9266       else
9267         {
9268         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
9269         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9270         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9271         while (stacksize > 0)
9272           {
9273           if ((int)GET2(slot, 0) == i)
9274             break;
9275           slot += common->name_entry_size;
9276           stacksize--;
9277           }
9278         }
9279 
9280       if (stacksize != 0)
9281         matchingpath += 1 + 2 * IMM2_SIZE;
9282       }
9283 
9284       /* The stacksize == 0 is a common "else" case. */
9285       if (stacksize == 0)
9286         {
9287         if (*cc == OP_ALT)
9288           {
9289           matchingpath = cc + 1 + LINK_SIZE;
9290           cc += GET(cc, 1);
9291           }
9292         else
9293           matchingpath = cc;
9294         }
9295     }
9296   else
9297     {
9298     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
9299     /* Similar code as PUSH_BACKTRACK macro. */
9300     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
9301     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9302       return NULL;
9303     memset(assert, 0, sizeof(assert_backtrack));
9304     assert->common.cc = matchingpath;
9305     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
9306     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
9307     }
9308   }
9309 
9310 compile_matchingpath(common, matchingpath, cc, backtrack);
9311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9312   return NULL;
9313 
9314 if (opcode == OP_ONCE)
9315   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9316 
9317 stacksize = 0;
9318 if (repeat_type == OP_MINUPTO)
9319   {
9320   /* We need to preserve the counter. TMP2 will be used below. */
9321   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9322   stacksize++;
9323   }
9324 if (ket != OP_KET || bra != OP_BRA)
9325   stacksize++;
9326 if (offset != 0)
9327   {
9328   if (common->capture_last_ptr != 0)
9329     stacksize++;
9330   if (common->optimized_cbracket[offset >> 1] == 0)
9331     stacksize += 2;
9332   }
9333 if (has_alternatives && opcode != OP_ONCE)
9334   stacksize++;
9335 
9336 if (stacksize > 0)
9337   allocate_stack(common, stacksize);
9338 
9339 stacksize = 0;
9340 if (repeat_type == OP_MINUPTO)
9341   {
9342   /* TMP2 was set above. */
9343   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9344   stacksize++;
9345   }
9346 
9347 if (ket != OP_KET || bra != OP_BRA)
9348   {
9349   if (ket != OP_KET)
9350     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9351   else
9352     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9353   stacksize++;
9354   }
9355 
9356 if (offset != 0)
9357   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9358 
9359 if (has_alternatives)
9360   {
9361   if (opcode != OP_ONCE)
9362     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9363   if (ket != OP_KETRMAX)
9364     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9365   }
9366 
9367 /* Must be after the matchingpath label. */
9368 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
9369   {
9370   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9371   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9372   }
9373 
9374 if (ket == OP_KETRMAX)
9375   {
9376   if (repeat_type != 0)
9377     {
9378     if (has_alternatives)
9379       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9380     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9381     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
9382     /* Drop STR_PTR for greedy plus quantifier. */
9383     if (opcode != OP_ONCE)
9384       free_stack(common, 1);
9385     }
9386   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9387     {
9388     if (has_alternatives)
9389       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9390     /* Checking zero-length iteration. */
9391     if (opcode != OP_ONCE)
9392       {
9393       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
9394       /* Drop STR_PTR for greedy plus quantifier. */
9395       if (bra != OP_BRAZERO)
9396         free_stack(common, 1);
9397       }
9398     else
9399       /* TMP2 must contain the starting STR_PTR. */
9400       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
9401     }
9402   else
9403     JUMPTO(SLJIT_JUMP, rmax_label);
9404   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9405   }
9406 
9407 if (repeat_type == OP_EXACT)
9408   {
9409   count_match(common);
9410   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9411   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
9412   }
9413 else if (repeat_type == OP_UPTO)
9414   {
9415   /* We need to preserve the counter. */
9416   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9417   allocate_stack(common, 1);
9418   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9419   }
9420 
9421 if (bra == OP_BRAZERO)
9422   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
9423 
9424 if (bra == OP_BRAMINZERO)
9425   {
9426   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
9427   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
9428   if (braminzero != NULL)
9429     {
9430     JUMPHERE(braminzero);
9431     /* We need to release the end pointer to perform the
9432     backtrack for the zero-length iteration. When
9433     framesize is < 0, OP_ONCE will do the release itself. */
9434     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
9435       {
9436       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9437       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9438       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
9439       }
9440     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
9441       free_stack(common, 1);
9442     }
9443   /* Continue to the normal backtrack. */
9444   }
9445 
9446 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
9447   count_match(common);
9448 
9449 /* Skip the other alternatives. */
9450 while (*cc == OP_ALT)
9451   cc += GET(cc, 1);
9452 cc += 1 + LINK_SIZE;
9453 
9454 if (opcode == OP_ONCE)
9455   {
9456   /* We temporarily encode the needs_control_head in the lowest bit.
9457      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
9458      the same value for small signed numbers (including negative numbers). */
9459   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
9460   }
9461 return cc + repeat_length;
9462 }
9463 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9464 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9465 {
9466 DEFINE_COMPILER;
9467 backtrack_common *backtrack;
9468 PCRE2_UCHAR opcode;
9469 int private_data_ptr;
9470 int cbraprivptr = 0;
9471 BOOL needs_control_head;
9472 int framesize;
9473 int stacksize;
9474 int offset = 0;
9475 BOOL zero = FALSE;
9476 PCRE2_SPTR ccbegin = NULL;
9477 int stack; /* Also contains the offset of control head. */
9478 struct sljit_label *loop = NULL;
9479 struct jump_list *emptymatch = NULL;
9480 
9481 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
9482 if (*cc == OP_BRAPOSZERO)
9483   {
9484   zero = TRUE;
9485   cc++;
9486   }
9487 
9488 opcode = *cc;
9489 private_data_ptr = PRIVATE_DATA(cc);
9490 SLJIT_ASSERT(private_data_ptr != 0);
9491 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
9492 switch(opcode)
9493   {
9494   case OP_BRAPOS:
9495   case OP_SBRAPOS:
9496   ccbegin = cc + 1 + LINK_SIZE;
9497   break;
9498 
9499   case OP_CBRAPOS:
9500   case OP_SCBRAPOS:
9501   offset = GET2(cc, 1 + LINK_SIZE);
9502   /* This case cannot be optimized in the same was as
9503   normal capturing brackets. */
9504   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
9505   cbraprivptr = OVECTOR_PRIV(offset);
9506   offset <<= 1;
9507   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
9508   break;
9509 
9510   default:
9511   SLJIT_UNREACHABLE();
9512   break;
9513   }
9514 
9515 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9516 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
9517 if (framesize < 0)
9518   {
9519   if (offset != 0)
9520     {
9521     stacksize = 2;
9522     if (common->capture_last_ptr != 0)
9523       stacksize++;
9524     }
9525   else
9526     stacksize = 1;
9527 
9528   if (needs_control_head)
9529     stacksize++;
9530   if (!zero)
9531     stacksize++;
9532 
9533   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
9534   allocate_stack(common, stacksize);
9535   if (framesize == no_frame)
9536     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9537 
9538   stack = 0;
9539   if (offset != 0)
9540     {
9541     stack = 2;
9542     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9543     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9544     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9545     if (common->capture_last_ptr != 0)
9546       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9547     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9548     if (needs_control_head)
9549       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9550     if (common->capture_last_ptr != 0)
9551       {
9552       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9553       stack = 3;
9554       }
9555     }
9556   else
9557     {
9558     if (needs_control_head)
9559       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9560     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9561     stack = 1;
9562     }
9563 
9564   if (needs_control_head)
9565     stack++;
9566   if (!zero)
9567     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
9568   if (needs_control_head)
9569     {
9570     stack--;
9571     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
9572     }
9573   }
9574 else
9575   {
9576   stacksize = framesize + 1;
9577   if (!zero)
9578     stacksize++;
9579   if (needs_control_head)
9580     stacksize++;
9581   if (offset == 0)
9582     stacksize++;
9583   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
9584 
9585   allocate_stack(common, stacksize);
9586   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9587   if (needs_control_head)
9588     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9589   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9590 
9591   stack = 0;
9592   if (!zero)
9593     {
9594     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
9595     stack = 1;
9596     }
9597   if (needs_control_head)
9598     {
9599     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
9600     stack++;
9601     }
9602   if (offset == 0)
9603     {
9604     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
9605     stack++;
9606     }
9607   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
9608   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
9609   stack -= 1 + (offset == 0);
9610   }
9611 
9612 if (offset != 0)
9613   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9614 
9615 loop = LABEL();
9616 while (*cc != OP_KETRPOS)
9617   {
9618   backtrack->top = NULL;
9619   backtrack->topbacktracks = NULL;
9620   cc += GET(cc, 1);
9621 
9622   compile_matchingpath(common, ccbegin, cc, backtrack);
9623   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9624     return NULL;
9625 
9626   if (framesize < 0)
9627     {
9628     if (framesize == no_frame)
9629       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9630 
9631     if (offset != 0)
9632       {
9633       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9634       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9635       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9636       if (common->capture_last_ptr != 0)
9637         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9638       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9639       }
9640     else
9641       {
9642       if (opcode == OP_SBRAPOS)
9643         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9644       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9645       }
9646 
9647     /* Even if the match is empty, we need to reset the control head. */
9648     if (needs_control_head)
9649       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
9650 
9651     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
9652       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
9653 
9654     if (!zero)
9655       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
9656     }
9657   else
9658     {
9659     if (offset != 0)
9660       {
9661       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9662       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9663       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9664       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9665       if (common->capture_last_ptr != 0)
9666         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9667       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9668       }
9669     else
9670       {
9671       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9672       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9673       if (opcode == OP_SBRAPOS)
9674         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
9675       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
9676       }
9677 
9678     /* Even if the match is empty, we need to reset the control head. */
9679     if (needs_control_head)
9680       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
9681 
9682     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
9683       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
9684 
9685     if (!zero)
9686       {
9687       if (framesize < 0)
9688         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
9689       else
9690         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9691       }
9692     }
9693 
9694   JUMPTO(SLJIT_JUMP, loop);
9695   flush_stubs(common);
9696 
9697   compile_backtrackingpath(common, backtrack->top);
9698   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9699     return NULL;
9700   set_jumps(backtrack->topbacktracks, LABEL());
9701 
9702   if (framesize < 0)
9703     {
9704     if (offset != 0)
9705       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9706     else
9707       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9708     }
9709   else
9710     {
9711     if (offset != 0)
9712       {
9713       /* Last alternative. */
9714       if (*cc == OP_KETRPOS)
9715         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9716       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9717       }
9718     else
9719       {
9720       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9721       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
9722       }
9723     }
9724 
9725   if (*cc == OP_KETRPOS)
9726     break;
9727   ccbegin = cc + 1 + LINK_SIZE;
9728   }
9729 
9730 /* We don't have to restore the control head in case of a failed match. */
9731 
9732 backtrack->topbacktracks = NULL;
9733 if (!zero)
9734   {
9735   if (framesize < 0)
9736     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
9737   else /* TMP2 is set to [private_data_ptr] above. */
9738     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
9739   }
9740 
9741 /* None of them matched. */
9742 set_jumps(emptymatch, LABEL());
9743 count_match(common);
9744 return cc + 1 + LINK_SIZE;
9745 }
9746 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)9747 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
9748 {
9749 int class_len;
9750 
9751 *opcode = *cc;
9752 *exact = 0;
9753 
9754 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
9755   {
9756   cc++;
9757   *type = OP_CHAR;
9758   }
9759 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
9760   {
9761   cc++;
9762   *type = OP_CHARI;
9763   *opcode -= OP_STARI - OP_STAR;
9764   }
9765 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
9766   {
9767   cc++;
9768   *type = OP_NOT;
9769   *opcode -= OP_NOTSTAR - OP_STAR;
9770   }
9771 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
9772   {
9773   cc++;
9774   *type = OP_NOTI;
9775   *opcode -= OP_NOTSTARI - OP_STAR;
9776   }
9777 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
9778   {
9779   cc++;
9780   *opcode -= OP_TYPESTAR - OP_STAR;
9781   *type = OP_END;
9782   }
9783 else
9784   {
9785   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
9786   *type = *opcode;
9787   cc++;
9788   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
9789   *opcode = cc[class_len - 1];
9790 
9791   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
9792     {
9793     *opcode -= OP_CRSTAR - OP_STAR;
9794     *end = cc + class_len;
9795 
9796     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
9797       {
9798       *exact = 1;
9799       *opcode -= OP_PLUS - OP_STAR;
9800       }
9801     }
9802   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
9803     {
9804     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
9805     *end = cc + class_len;
9806 
9807     if (*opcode == OP_POSPLUS)
9808       {
9809       *exact = 1;
9810       *opcode = OP_POSSTAR;
9811       }
9812     }
9813   else
9814     {
9815     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
9816     *max = GET2(cc, (class_len + IMM2_SIZE));
9817     *exact = GET2(cc, class_len);
9818 
9819     if (*max == 0)
9820       {
9821       if (*opcode == OP_CRPOSRANGE)
9822         *opcode = OP_POSSTAR;
9823       else
9824         *opcode -= OP_CRRANGE - OP_STAR;
9825       }
9826     else
9827       {
9828       *max -= *exact;
9829       if (*max == 0)
9830         *opcode = OP_EXACT;
9831       else if (*max == 1)
9832         {
9833         if (*opcode == OP_CRPOSRANGE)
9834           *opcode = OP_POSQUERY;
9835         else
9836           *opcode -= OP_CRRANGE - OP_QUERY;
9837         }
9838       else
9839         {
9840         if (*opcode == OP_CRPOSRANGE)
9841           *opcode = OP_POSUPTO;
9842         else
9843           *opcode -= OP_CRRANGE - OP_UPTO;
9844         }
9845       }
9846     *end = cc + class_len + 2 * IMM2_SIZE;
9847     }
9848   return cc;
9849   }
9850 
9851 switch(*opcode)
9852   {
9853   case OP_EXACT:
9854   *exact = GET2(cc, 0);
9855   cc += IMM2_SIZE;
9856   break;
9857 
9858   case OP_PLUS:
9859   case OP_MINPLUS:
9860   *exact = 1;
9861   *opcode -= OP_PLUS - OP_STAR;
9862   break;
9863 
9864   case OP_POSPLUS:
9865   *exact = 1;
9866   *opcode = OP_POSSTAR;
9867   break;
9868 
9869   case OP_UPTO:
9870   case OP_MINUPTO:
9871   case OP_POSUPTO:
9872   *max = GET2(cc, 0);
9873   cc += IMM2_SIZE;
9874   break;
9875   }
9876 
9877 if (*type == OP_END)
9878   {
9879   *type = *cc;
9880   *end = next_opcode(common, cc);
9881   cc++;
9882   return cc;
9883   }
9884 
9885 *end = cc + 1;
9886 #ifdef SUPPORT_UNICODE
9887 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
9888 #endif
9889 return cc;
9890 }
9891 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9892 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9893 {
9894 DEFINE_COMPILER;
9895 backtrack_common *backtrack;
9896 PCRE2_UCHAR opcode;
9897 PCRE2_UCHAR type;
9898 sljit_u32 max = 0, exact;
9899 BOOL fast_fail;
9900 sljit_s32 fast_str_ptr;
9901 BOOL charpos_enabled;
9902 PCRE2_UCHAR charpos_char;
9903 unsigned int charpos_othercasebit;
9904 PCRE2_SPTR end;
9905 jump_list *no_match = NULL;
9906 jump_list *no_char1_match = NULL;
9907 struct sljit_jump *jump = NULL;
9908 struct sljit_label *label;
9909 int private_data_ptr = PRIVATE_DATA(cc);
9910 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9911 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9912 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9913 int tmp_base, tmp_offset;
9914 
9915 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
9916 
9917 fast_str_ptr = PRIVATE_DATA(cc + 1);
9918 fast_fail = TRUE;
9919 
9920 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
9921 
9922 if (cc == common->fast_forward_bc_ptr)
9923   fast_fail = FALSE;
9924 else if (common->fast_fail_start_ptr == 0)
9925   fast_str_ptr = 0;
9926 
9927 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
9928   || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
9929 
9930 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9931 
9932 if (type != OP_EXTUNI)
9933   {
9934   tmp_base = TMP3;
9935   tmp_offset = 0;
9936   }
9937 else
9938   {
9939   tmp_base = SLJIT_MEM1(SLJIT_SP);
9940   tmp_offset = POSSESSIVE0;
9941   }
9942 
9943 if (fast_fail && fast_str_ptr != 0)
9944   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
9945 
9946 /* Handle fixed part first. */
9947 if (exact > 1)
9948   {
9949   SLJIT_ASSERT(fast_str_ptr == 0);
9950   if (common->mode == PCRE2_JIT_COMPLETE
9951 #ifdef SUPPORT_UNICODE
9952       && !common->utf
9953 #endif
9954       )
9955     {
9956     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9957     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9958     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9959     label = LABEL();
9960     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9961     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9962     JUMPTO(SLJIT_NOT_ZERO, label);
9963     }
9964   else
9965     {
9966     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9967     label = LABEL();
9968     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9969     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9970     JUMPTO(SLJIT_NOT_ZERO, label);
9971     }
9972   }
9973 else if (exact == 1)
9974   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9975 
9976 switch(opcode)
9977   {
9978   case OP_STAR:
9979   case OP_UPTO:
9980   SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9981 
9982   if (type == OP_ANYNL || type == OP_EXTUNI)
9983     {
9984     SLJIT_ASSERT(private_data_ptr == 0);
9985     SLJIT_ASSERT(fast_str_ptr == 0);
9986 
9987     allocate_stack(common, 2);
9988     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9989     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9990 
9991     if (opcode == OP_UPTO)
9992       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9993 
9994     label = LABEL();
9995     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9996     if (opcode == OP_UPTO)
9997       {
9998       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9999       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10000       jump = JUMP(SLJIT_ZERO);
10001       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
10002       }
10003 
10004     /* We cannot use TMP3 because of this allocate_stack. */
10005     allocate_stack(common, 1);
10006     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10007     JUMPTO(SLJIT_JUMP, label);
10008     if (jump != NULL)
10009       JUMPHERE(jump);
10010     }
10011   else
10012     {
10013     charpos_enabled = FALSE;
10014     charpos_char = 0;
10015     charpos_othercasebit = 0;
10016 
10017     if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10018       {
10019       charpos_enabled = TRUE;
10020 #ifdef SUPPORT_UNICODE
10021       charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10022 #endif
10023       if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10024         {
10025         charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10026         if (charpos_othercasebit == 0)
10027           charpos_enabled = FALSE;
10028         }
10029 
10030       if (charpos_enabled)
10031         {
10032         charpos_char = end[1];
10033         /* Consumpe the OP_CHAR opcode. */
10034         end += 2;
10035 #if PCRE2_CODE_UNIT_WIDTH == 8
10036         SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10037 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10038         SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10039         if ((charpos_othercasebit & 0x100) != 0)
10040           charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10041 #endif
10042         if (charpos_othercasebit != 0)
10043           charpos_char |= charpos_othercasebit;
10044 
10045         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
10046         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
10047         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
10048         }
10049       }
10050 
10051     if (charpos_enabled)
10052       {
10053       if (opcode == OP_UPTO)
10054         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
10055 
10056       /* Search the first instance of charpos_char. */
10057       jump = JUMP(SLJIT_JUMP);
10058       label = LABEL();
10059       if (opcode == OP_UPTO)
10060         {
10061         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10062         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
10063         }
10064       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
10065       if (fast_str_ptr != 0)
10066         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10067       JUMPHERE(jump);
10068 
10069       detect_partial_match(common, &backtrack->topbacktracks);
10070       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10071       if (charpos_othercasebit != 0)
10072         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10073       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10074 
10075       if (private_data_ptr == 0)
10076         allocate_stack(common, 2);
10077       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10078       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10079       if (opcode == OP_UPTO)
10080         {
10081         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10082         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10083         }
10084 
10085       /* Search the last instance of charpos_char. */
10086       label = LABEL();
10087       compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
10088       if (fast_str_ptr != 0)
10089         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10090       detect_partial_match(common, &no_match);
10091       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10092       if (charpos_othercasebit != 0)
10093         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10094       if (opcode == OP_STAR)
10095         {
10096         CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10097         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10098         }
10099       else
10100         {
10101         jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
10102         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10103         JUMPHERE(jump);
10104         }
10105 
10106       if (opcode == OP_UPTO)
10107         {
10108         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10109         JUMPTO(SLJIT_NOT_ZERO, label);
10110         }
10111       else
10112         JUMPTO(SLJIT_JUMP, label);
10113 
10114       set_jumps(no_match, LABEL());
10115       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10116       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10117       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10118       }
10119 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10120     else if (common->utf)
10121       {
10122       if (private_data_ptr == 0)
10123         allocate_stack(common, 2);
10124 
10125       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10126       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10127 
10128       if (opcode == OP_UPTO)
10129         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10130 
10131       label = LABEL();
10132       compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10133       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10134 
10135       if (opcode == OP_UPTO)
10136         {
10137         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10138         JUMPTO(SLJIT_NOT_ZERO, label);
10139         }
10140       else
10141         JUMPTO(SLJIT_JUMP, label);
10142 
10143       set_jumps(no_match, LABEL());
10144       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10145       if (fast_str_ptr != 0)
10146         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10147       }
10148 #endif
10149     else
10150       {
10151       if (private_data_ptr == 0)
10152         allocate_stack(common, 2);
10153 
10154       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10155       if (opcode == OP_UPTO)
10156         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10157 
10158       label = LABEL();
10159       detect_partial_match(common, &no_match);
10160       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10161       if (opcode == OP_UPTO)
10162         {
10163         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10164         JUMPTO(SLJIT_NOT_ZERO, label);
10165         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10166         }
10167       else
10168         JUMPTO(SLJIT_JUMP, label);
10169 
10170       set_jumps(no_char1_match, LABEL());
10171       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10172       set_jumps(no_match, LABEL());
10173       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10174       if (fast_str_ptr != 0)
10175         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10176       }
10177     }
10178   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10179   break;
10180 
10181   case OP_MINSTAR:
10182   if (private_data_ptr == 0)
10183     allocate_stack(common, 1);
10184   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10185   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10186   if (fast_str_ptr != 0)
10187     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10188   break;
10189 
10190   case OP_MINUPTO:
10191   SLJIT_ASSERT(fast_str_ptr == 0);
10192   if (private_data_ptr == 0)
10193     allocate_stack(common, 2);
10194   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10195   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
10196   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10197   break;
10198 
10199   case OP_QUERY:
10200   case OP_MINQUERY:
10201   SLJIT_ASSERT(fast_str_ptr == 0);
10202   if (private_data_ptr == 0)
10203     allocate_stack(common, 1);
10204   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10205   if (opcode == OP_QUERY)
10206     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
10207   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10208   break;
10209 
10210   case OP_EXACT:
10211   break;
10212 
10213   case OP_POSSTAR:
10214 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10215   if (common->utf)
10216     {
10217     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10218     label = LABEL();
10219     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10220     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10221     JUMPTO(SLJIT_JUMP, label);
10222     set_jumps(no_match, LABEL());
10223     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
10224     if (fast_str_ptr != 0)
10225       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10226     break;
10227     }
10228 #endif
10229   label = LABEL();
10230   detect_partial_match(common, &no_match);
10231   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10232   JUMPTO(SLJIT_JUMP, label);
10233   set_jumps(no_char1_match, LABEL());
10234   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10235   set_jumps(no_match, LABEL());
10236   if (fast_str_ptr != 0)
10237     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10238   break;
10239 
10240   case OP_POSUPTO:
10241   SLJIT_ASSERT(fast_str_ptr == 0);
10242 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10243   if (common->utf)
10244     {
10245     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
10246     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10247     label = LABEL();
10248     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10249     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
10250     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10251     JUMPTO(SLJIT_NOT_ZERO, label);
10252     set_jumps(no_match, LABEL());
10253     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
10254     break;
10255     }
10256 #endif
10257   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10258   label = LABEL();
10259   detect_partial_match(common, &no_match);
10260   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10261   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10262   JUMPTO(SLJIT_NOT_ZERO, label);
10263   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10264   set_jumps(no_char1_match, LABEL());
10265   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10266   set_jumps(no_match, LABEL());
10267   break;
10268 
10269   case OP_POSQUERY:
10270   SLJIT_ASSERT(fast_str_ptr == 0);
10271   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10272   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10273   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10274   set_jumps(no_match, LABEL());
10275   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
10276   break;
10277 
10278   default:
10279   SLJIT_UNREACHABLE();
10280   break;
10281   }
10282 
10283 count_match(common);
10284 return end;
10285 }
10286 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10287 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10288 {
10289 DEFINE_COMPILER;
10290 backtrack_common *backtrack;
10291 
10292 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
10293 
10294 if (*cc == OP_FAIL)
10295   {
10296   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
10297   return cc + 1;
10298   }
10299 
10300 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
10301   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
10302 
10303 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
10304   {
10305   /* No need to check notempty conditions. */
10306   if (common->accept_label == NULL)
10307     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10308   else
10309     JUMPTO(SLJIT_JUMP, common->accept_label);
10310   return cc + 1;
10311   }
10312 
10313 if (common->accept_label == NULL)
10314   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
10315 else
10316   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
10317 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10318 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
10319 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
10320 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
10321 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
10322 if (common->accept_label == NULL)
10323   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
10324 else
10325   JUMPTO(SLJIT_ZERO, common->accept_label);
10326 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10327 if (common->accept_label == NULL)
10328   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
10329 else
10330   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
10331 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
10332 return cc + 1;
10333 }
10334 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)10335 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
10336 {
10337 DEFINE_COMPILER;
10338 int offset = GET2(cc, 1);
10339 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
10340 
10341 /* Data will be discarded anyway... */
10342 if (common->currententry != NULL)
10343   return cc + 1 + IMM2_SIZE;
10344 
10345 if (!optimized_cbracket)
10346   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
10347 offset <<= 1;
10348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10349 if (!optimized_cbracket)
10350   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351 return cc + 1 + IMM2_SIZE;
10352 }
10353 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10354 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10355 {
10356 DEFINE_COMPILER;
10357 backtrack_common *backtrack;
10358 PCRE2_UCHAR opcode = *cc;
10359 PCRE2_SPTR ccend = cc + 1;
10360 
10361 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
10362     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
10363   ccend += 2 + cc[1];
10364 
10365 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
10366 
10367 if (opcode == OP_SKIP)
10368   {
10369   allocate_stack(common, 1);
10370   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10371   return ccend;
10372   }
10373 
10374 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
10375   {
10376   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10377   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
10378   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
10379   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
10380   }
10381 
10382 return ccend;
10383 }
10384 
10385 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
10386 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)10387 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
10388 {
10389 DEFINE_COMPILER;
10390 backtrack_common *backtrack;
10391 BOOL needs_control_head;
10392 int size;
10393 
10394 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
10395 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
10396 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
10397 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
10398 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
10399 
10400 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
10401 size = 3 + (size < 0 ? 0 : size);
10402 
10403 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10404 allocate_stack(common, size);
10405 if (size > 3)
10406   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
10407 else
10408   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
10409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
10410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
10411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
10412 
10413 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
10414 if (size >= 0)
10415   init_frame(common, cc, ccend, size - 1, 0);
10416 }
10417 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)10418 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
10419 {
10420 DEFINE_COMPILER;
10421 backtrack_common *backtrack;
10422 BOOL has_then_trap = FALSE;
10423 then_trap_backtrack *save_then_trap = NULL;
10424 
10425 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
10426 
10427 if (common->has_then && common->then_offsets[cc - common->start] != 0)
10428   {
10429   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
10430   has_then_trap = TRUE;
10431   save_then_trap = common->then_trap;
10432   /* Tail item on backtrack. */
10433   compile_then_trap_matchingpath(common, cc, ccend, parent);
10434   }
10435 
10436 while (cc < ccend)
10437   {
10438   switch(*cc)
10439     {
10440     case OP_SOD:
10441     case OP_SOM:
10442     case OP_NOT_WORD_BOUNDARY:
10443     case OP_WORD_BOUNDARY:
10444     case OP_EODN:
10445     case OP_EOD:
10446     case OP_DOLL:
10447     case OP_DOLLM:
10448     case OP_CIRC:
10449     case OP_CIRCM:
10450     case OP_REVERSE:
10451     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10452     break;
10453 
10454     case OP_NOT_DIGIT:
10455     case OP_DIGIT:
10456     case OP_NOT_WHITESPACE:
10457     case OP_WHITESPACE:
10458     case OP_NOT_WORDCHAR:
10459     case OP_WORDCHAR:
10460     case OP_ANY:
10461     case OP_ALLANY:
10462     case OP_ANYBYTE:
10463     case OP_NOTPROP:
10464     case OP_PROP:
10465     case OP_ANYNL:
10466     case OP_NOT_HSPACE:
10467     case OP_HSPACE:
10468     case OP_NOT_VSPACE:
10469     case OP_VSPACE:
10470     case OP_EXTUNI:
10471     case OP_NOT:
10472     case OP_NOTI:
10473     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10474     break;
10475 
10476     case OP_SET_SOM:
10477     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
10478     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10479     allocate_stack(common, 1);
10480     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10481     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10482     cc++;
10483     break;
10484 
10485     case OP_CHAR:
10486     case OP_CHARI:
10487     if (common->mode == PCRE2_JIT_COMPLETE)
10488       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10489     else
10490       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10491     break;
10492 
10493     case OP_STAR:
10494     case OP_MINSTAR:
10495     case OP_PLUS:
10496     case OP_MINPLUS:
10497     case OP_QUERY:
10498     case OP_MINQUERY:
10499     case OP_UPTO:
10500     case OP_MINUPTO:
10501     case OP_EXACT:
10502     case OP_POSSTAR:
10503     case OP_POSPLUS:
10504     case OP_POSQUERY:
10505     case OP_POSUPTO:
10506     case OP_STARI:
10507     case OP_MINSTARI:
10508     case OP_PLUSI:
10509     case OP_MINPLUSI:
10510     case OP_QUERYI:
10511     case OP_MINQUERYI:
10512     case OP_UPTOI:
10513     case OP_MINUPTOI:
10514     case OP_EXACTI:
10515     case OP_POSSTARI:
10516     case OP_POSPLUSI:
10517     case OP_POSQUERYI:
10518     case OP_POSUPTOI:
10519     case OP_NOTSTAR:
10520     case OP_NOTMINSTAR:
10521     case OP_NOTPLUS:
10522     case OP_NOTMINPLUS:
10523     case OP_NOTQUERY:
10524     case OP_NOTMINQUERY:
10525     case OP_NOTUPTO:
10526     case OP_NOTMINUPTO:
10527     case OP_NOTEXACT:
10528     case OP_NOTPOSSTAR:
10529     case OP_NOTPOSPLUS:
10530     case OP_NOTPOSQUERY:
10531     case OP_NOTPOSUPTO:
10532     case OP_NOTSTARI:
10533     case OP_NOTMINSTARI:
10534     case OP_NOTPLUSI:
10535     case OP_NOTMINPLUSI:
10536     case OP_NOTQUERYI:
10537     case OP_NOTMINQUERYI:
10538     case OP_NOTUPTOI:
10539     case OP_NOTMINUPTOI:
10540     case OP_NOTEXACTI:
10541     case OP_NOTPOSSTARI:
10542     case OP_NOTPOSPLUSI:
10543     case OP_NOTPOSQUERYI:
10544     case OP_NOTPOSUPTOI:
10545     case OP_TYPESTAR:
10546     case OP_TYPEMINSTAR:
10547     case OP_TYPEPLUS:
10548     case OP_TYPEMINPLUS:
10549     case OP_TYPEQUERY:
10550     case OP_TYPEMINQUERY:
10551     case OP_TYPEUPTO:
10552     case OP_TYPEMINUPTO:
10553     case OP_TYPEEXACT:
10554     case OP_TYPEPOSSTAR:
10555     case OP_TYPEPOSPLUS:
10556     case OP_TYPEPOSQUERY:
10557     case OP_TYPEPOSUPTO:
10558     cc = compile_iterator_matchingpath(common, cc, parent);
10559     break;
10560 
10561     case OP_CLASS:
10562     case OP_NCLASS:
10563     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
10564       cc = compile_iterator_matchingpath(common, cc, parent);
10565     else
10566       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10567     break;
10568 
10569 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10570     case OP_XCLASS:
10571     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
10572       cc = compile_iterator_matchingpath(common, cc, parent);
10573     else
10574       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10575     break;
10576 #endif
10577 
10578     case OP_REF:
10579     case OP_REFI:
10580     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
10581       cc = compile_ref_iterator_matchingpath(common, cc, parent);
10582     else
10583       {
10584       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
10585       cc += 1 + IMM2_SIZE;
10586       }
10587     break;
10588 
10589     case OP_DNREF:
10590     case OP_DNREFI:
10591     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
10592       cc = compile_ref_iterator_matchingpath(common, cc, parent);
10593     else
10594       {
10595       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10596       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
10597       cc += 1 + 2 * IMM2_SIZE;
10598       }
10599     break;
10600 
10601     case OP_RECURSE:
10602     cc = compile_recurse_matchingpath(common, cc, parent);
10603     break;
10604 
10605     case OP_CALLOUT:
10606     case OP_CALLOUT_STR:
10607     cc = compile_callout_matchingpath(common, cc, parent);
10608     break;
10609 
10610     case OP_ASSERT:
10611     case OP_ASSERT_NOT:
10612     case OP_ASSERTBACK:
10613     case OP_ASSERTBACK_NOT:
10614     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
10615     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
10616     break;
10617 
10618     case OP_BRAMINZERO:
10619     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
10620     cc = bracketend(cc + 1);
10621     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
10622       {
10623       allocate_stack(common, 1);
10624       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10625       }
10626     else
10627       {
10628       allocate_stack(common, 2);
10629       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10630       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
10631       }
10632     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
10633     count_match(common);
10634     break;
10635 
10636     case OP_ONCE:
10637     case OP_BRA:
10638     case OP_CBRA:
10639     case OP_COND:
10640     case OP_SBRA:
10641     case OP_SCBRA:
10642     case OP_SCOND:
10643     cc = compile_bracket_matchingpath(common, cc, parent);
10644     break;
10645 
10646     case OP_BRAZERO:
10647     if (cc[1] > OP_ASSERTBACK_NOT)
10648       cc = compile_bracket_matchingpath(common, cc, parent);
10649     else
10650       {
10651       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
10652       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
10653       }
10654     break;
10655 
10656     case OP_BRAPOS:
10657     case OP_CBRAPOS:
10658     case OP_SBRAPOS:
10659     case OP_SCBRAPOS:
10660     case OP_BRAPOSZERO:
10661     cc = compile_bracketpos_matchingpath(common, cc, parent);
10662     break;
10663 
10664     case OP_MARK:
10665     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
10666     SLJIT_ASSERT(common->mark_ptr != 0);
10667     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
10668     allocate_stack(common, common->has_skip_arg ? 5 : 1);
10669     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10670     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
10671     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
10672     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
10673     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
10674     if (common->has_skip_arg)
10675       {
10676       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10677       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
10678       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
10679       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
10680       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10681       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10682       }
10683     cc += 1 + 2 + cc[1];
10684     break;
10685 
10686     case OP_PRUNE:
10687     case OP_PRUNE_ARG:
10688     case OP_SKIP:
10689     case OP_SKIP_ARG:
10690     case OP_THEN:
10691     case OP_THEN_ARG:
10692     case OP_COMMIT:
10693     case OP_COMMIT_ARG:
10694     cc = compile_control_verb_matchingpath(common, cc, parent);
10695     break;
10696 
10697     case OP_FAIL:
10698     case OP_ACCEPT:
10699     case OP_ASSERT_ACCEPT:
10700     cc = compile_fail_accept_matchingpath(common, cc, parent);
10701     break;
10702 
10703     case OP_CLOSE:
10704     cc = compile_close_matchingpath(common, cc);
10705     break;
10706 
10707     case OP_SKIPZERO:
10708     cc = bracketend(cc + 1);
10709     break;
10710 
10711     default:
10712     SLJIT_UNREACHABLE();
10713     return;
10714     }
10715   if (cc == NULL)
10716     return;
10717   }
10718 
10719 if (has_then_trap)
10720   {
10721   /* Head item on backtrack. */
10722   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
10723   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
10724   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
10725   common->then_trap = save_then_trap;
10726   }
10727 SLJIT_ASSERT(cc == ccend);
10728 }
10729 
10730 #undef PUSH_BACKTRACK
10731 #undef PUSH_BACKTRACK_NOVALUE
10732 #undef BACKTRACK_AS
10733 
10734 #define COMPILE_BACKTRACKINGPATH(current) \
10735   do \
10736     { \
10737     compile_backtrackingpath(common, (current)); \
10738     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
10739       return; \
10740     } \
10741   while (0)
10742 
10743 #define CURRENT_AS(type) ((type *)current)
10744 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)10745 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10746 {
10747 DEFINE_COMPILER;
10748 PCRE2_SPTR cc = current->cc;
10749 PCRE2_UCHAR opcode;
10750 PCRE2_UCHAR type;
10751 sljit_u32 max = 0, exact;
10752 struct sljit_label *label = NULL;
10753 struct sljit_jump *jump = NULL;
10754 jump_list *jumplist = NULL;
10755 PCRE2_SPTR end;
10756 int private_data_ptr = PRIVATE_DATA(cc);
10757 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10758 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10759 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
10760 
10761 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10762 
10763 switch(opcode)
10764   {
10765   case OP_STAR:
10766   case OP_UPTO:
10767   if (type == OP_ANYNL || type == OP_EXTUNI)
10768     {
10769     SLJIT_ASSERT(private_data_ptr == 0);
10770     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
10771     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10772     free_stack(common, 1);
10773     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10774     }
10775   else
10776     {
10777     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
10778       {
10779       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10780       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
10781       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10782 
10783       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10784       label = LABEL();
10785       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
10786       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10787       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
10788         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
10789       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10790       skip_char_back(common);
10791       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
10792       }
10793     else
10794       {
10795       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10796       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
10797       skip_char_back(common);
10798       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10799       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10800       }
10801     JUMPHERE(jump);
10802     if (private_data_ptr == 0)
10803       free_stack(common, 2);
10804     }
10805   break;
10806 
10807   case OP_MINSTAR:
10808   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10809   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10810   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10811   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10812   set_jumps(jumplist, LABEL());
10813   if (private_data_ptr == 0)
10814     free_stack(common, 1);
10815   break;
10816 
10817   case OP_MINUPTO:
10818   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
10819   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10820   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10821   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
10822 
10823   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
10824   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10825   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10826   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10827 
10828   set_jumps(jumplist, LABEL());
10829   if (private_data_ptr == 0)
10830     free_stack(common, 2);
10831   break;
10832 
10833   case OP_QUERY:
10834   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10835   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10836   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10837   jump = JUMP(SLJIT_JUMP);
10838   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
10839   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10840   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10841   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10842   JUMPHERE(jump);
10843   if (private_data_ptr == 0)
10844     free_stack(common, 1);
10845   break;
10846 
10847   case OP_MINQUERY:
10848   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10849   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10850   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10851   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10852   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10853   set_jumps(jumplist, LABEL());
10854   JUMPHERE(jump);
10855   if (private_data_ptr == 0)
10856     free_stack(common, 1);
10857   break;
10858 
10859   case OP_EXACT:
10860   case OP_POSSTAR:
10861   case OP_POSQUERY:
10862   case OP_POSUPTO:
10863   break;
10864 
10865   default:
10866   SLJIT_UNREACHABLE();
10867   break;
10868   }
10869 
10870 set_jumps(current->topbacktracks, LABEL());
10871 }
10872 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)10873 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10874 {
10875 DEFINE_COMPILER;
10876 PCRE2_SPTR cc = current->cc;
10877 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
10878 PCRE2_UCHAR type;
10879 
10880 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
10881 
10882 if ((type & 0x1) == 0)
10883   {
10884   /* Maximize case. */
10885   set_jumps(current->topbacktracks, LABEL());
10886   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10887   free_stack(common, 1);
10888   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
10889   return;
10890   }
10891 
10892 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10893 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
10894 set_jumps(current->topbacktracks, LABEL());
10895 free_stack(common, ref ? 2 : 3);
10896 }
10897 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)10898 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10899 {
10900 DEFINE_COMPILER;
10901 recurse_entry *entry;
10902 
10903 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
10904   {
10905   entry = CURRENT_AS(recurse_backtrack)->entry;
10906   if (entry->backtrack_label == NULL)
10907     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
10908   else
10909     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
10910   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
10911   }
10912 else
10913   compile_backtrackingpath(common, current->top);
10914 
10915 set_jumps(current->topbacktracks, LABEL());
10916 }
10917 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)10918 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10919 {
10920 DEFINE_COMPILER;
10921 PCRE2_SPTR cc = current->cc;
10922 PCRE2_UCHAR bra = OP_BRA;
10923 struct sljit_jump *brajump = NULL;
10924 
10925 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
10926 if (*cc == OP_BRAZERO)
10927   {
10928   bra = *cc;
10929   cc++;
10930   }
10931 
10932 if (bra == OP_BRAZERO)
10933   {
10934   SLJIT_ASSERT(current->topbacktracks == NULL);
10935   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10936   }
10937 
10938 if (CURRENT_AS(assert_backtrack)->framesize < 0)
10939   {
10940   set_jumps(current->topbacktracks, LABEL());
10941 
10942   if (bra == OP_BRAZERO)
10943     {
10944     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10945     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10946     free_stack(common, 1);
10947     }
10948   return;
10949   }
10950 
10951 if (bra == OP_BRAZERO)
10952   {
10953   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10954     {
10955     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10956     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10957     free_stack(common, 1);
10958     return;
10959     }
10960   free_stack(common, 1);
10961   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10962   }
10963 
10964 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10965   {
10966   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10967   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10968   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10969   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
10970   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
10971 
10972   set_jumps(current->topbacktracks, LABEL());
10973   }
10974 else
10975   set_jumps(current->topbacktracks, LABEL());
10976 
10977 if (bra == OP_BRAZERO)
10978   {
10979   /* We know there is enough place on the stack. */
10980   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10981   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10982   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10983   JUMPHERE(brajump);
10984   }
10985 }
10986 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)10987 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10988 {
10989 DEFINE_COMPILER;
10990 int opcode, stacksize, alt_count, alt_max;
10991 int offset = 0;
10992 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10993 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10994 PCRE2_SPTR cc = current->cc;
10995 PCRE2_SPTR ccbegin;
10996 PCRE2_SPTR ccprev;
10997 PCRE2_UCHAR bra = OP_BRA;
10998 PCRE2_UCHAR ket;
10999 assert_backtrack *assert;
11000 sljit_uw *next_update_addr = NULL;
11001 BOOL has_alternatives;
11002 BOOL needs_control_head = FALSE;
11003 struct sljit_jump *brazero = NULL;
11004 struct sljit_jump *alt1 = NULL;
11005 struct sljit_jump *alt2 = NULL;
11006 struct sljit_jump *once = NULL;
11007 struct sljit_jump *cond = NULL;
11008 struct sljit_label *rmin_label = NULL;
11009 struct sljit_label *exact_label = NULL;
11010 
11011 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
11012   {
11013   bra = *cc;
11014   cc++;
11015   }
11016 
11017 opcode = *cc;
11018 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
11019 ket = *ccbegin;
11020 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
11021   {
11022   repeat_ptr = PRIVATE_DATA(ccbegin);
11023   repeat_type = PRIVATE_DATA(ccbegin + 2);
11024   repeat_count = PRIVATE_DATA(ccbegin + 3);
11025   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
11026   if (repeat_type == OP_UPTO)
11027     ket = OP_KETRMAX;
11028   if (repeat_type == OP_MINUPTO)
11029     ket = OP_KETRMIN;
11030   }
11031 ccbegin = cc;
11032 cc += GET(cc, 1);
11033 has_alternatives = *cc == OP_ALT;
11034 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11035   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
11036 if (opcode == OP_CBRA || opcode == OP_SCBRA)
11037   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
11038 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
11039   opcode = OP_SCOND;
11040 
11041 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
11042 
11043 /* Decoding the needs_control_head in framesize. */
11044 if (opcode == OP_ONCE)
11045   {
11046   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
11047   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
11048   }
11049 
11050 if (ket != OP_KET && repeat_type != 0)
11051   {
11052   /* TMP1 is used in OP_KETRMIN below. */
11053   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11054   free_stack(common, 1);
11055   if (repeat_type == OP_UPTO)
11056     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
11057   else
11058     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
11059   }
11060 
11061 if (ket == OP_KETRMAX)
11062   {
11063   if (bra == OP_BRAZERO)
11064     {
11065     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11066     free_stack(common, 1);
11067     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
11068     }
11069   }
11070 else if (ket == OP_KETRMIN)
11071   {
11072   if (bra != OP_BRAMINZERO)
11073     {
11074     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11075     if (repeat_type != 0)
11076       {
11077       /* TMP1 was set a few lines above. */
11078       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11079       /* Drop STR_PTR for non-greedy plus quantifier. */
11080       if (opcode != OP_ONCE)
11081         free_stack(common, 1);
11082       }
11083     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
11084       {
11085       /* Checking zero-length iteration. */
11086       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
11087         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11088       else
11089         {
11090         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11091         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11092         }
11093       /* Drop STR_PTR for non-greedy plus quantifier. */
11094       if (opcode != OP_ONCE)
11095         free_stack(common, 1);
11096       }
11097     else
11098       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11099     }
11100   rmin_label = LABEL();
11101   if (repeat_type != 0)
11102     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11103   }
11104 else if (bra == OP_BRAZERO)
11105   {
11106   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11107   free_stack(common, 1);
11108   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
11109   }
11110 else if (repeat_type == OP_EXACT)
11111   {
11112   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11113   exact_label = LABEL();
11114   }
11115 
11116 if (offset != 0)
11117   {
11118   if (common->capture_last_ptr != 0)
11119     {
11120     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
11121     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11122     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11123     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
11124     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
11125     free_stack(common, 3);
11126     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
11127     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
11128     }
11129   else if (common->optimized_cbracket[offset >> 1] == 0)
11130     {
11131     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11132     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11133     free_stack(common, 2);
11134     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11135     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11136     }
11137   }
11138 
11139 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
11140   {
11141   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11142     {
11143     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11144     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11145     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11146     }
11147   once = JUMP(SLJIT_JUMP);
11148   }
11149 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11150   {
11151   if (has_alternatives)
11152     {
11153     /* Always exactly one alternative. */
11154     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11155     free_stack(common, 1);
11156 
11157     alt_max = 2;
11158     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11159     }
11160   }
11161 else if (has_alternatives)
11162   {
11163   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11164   free_stack(common, 1);
11165 
11166   if (alt_max > 4)
11167     {
11168     /* Table jump if alt_max is greater than 4. */
11169     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
11170     if (SLJIT_UNLIKELY(next_update_addr == NULL))
11171       return;
11172     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
11173     add_label_addr(common, next_update_addr++);
11174     }
11175   else
11176     {
11177     if (alt_max == 4)
11178       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11179     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11180     }
11181   }
11182 
11183 COMPILE_BACKTRACKINGPATH(current->top);
11184 if (current->topbacktracks)
11185   set_jumps(current->topbacktracks, LABEL());
11186 
11187 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11188   {
11189   /* Conditional block always has at most one alternative. */
11190   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
11191     {
11192     SLJIT_ASSERT(has_alternatives);
11193     assert = CURRENT_AS(bracket_backtrack)->u.assert;
11194     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
11195       {
11196       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
11197       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11198       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
11199       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
11200       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
11201       }
11202     cond = JUMP(SLJIT_JUMP);
11203     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
11204     }
11205   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
11206     {
11207     SLJIT_ASSERT(has_alternatives);
11208     cond = JUMP(SLJIT_JUMP);
11209     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
11210     }
11211   else
11212     SLJIT_ASSERT(!has_alternatives);
11213   }
11214 
11215 if (has_alternatives)
11216   {
11217   alt_count = sizeof(sljit_uw);
11218   do
11219     {
11220     current->top = NULL;
11221     current->topbacktracks = NULL;
11222     current->nextbacktracks = NULL;
11223     /* Conditional blocks always have an additional alternative, even if it is empty. */
11224     if (*cc == OP_ALT)
11225       {
11226       ccprev = cc + 1 + LINK_SIZE;
11227       cc += GET(cc, 1);
11228       if (opcode != OP_COND && opcode != OP_SCOND)
11229         {
11230         if (opcode != OP_ONCE)
11231           {
11232           if (private_data_ptr != 0)
11233             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11234           else
11235             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11236           }
11237         else
11238           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
11239         }
11240       compile_matchingpath(common, ccprev, cc, current);
11241       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11242         return;
11243       }
11244 
11245     /* Instructions after the current alternative is successfully matched. */
11246     /* There is a similar code in compile_bracket_matchingpath. */
11247     if (opcode == OP_ONCE)
11248       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11249 
11250     stacksize = 0;
11251     if (repeat_type == OP_MINUPTO)
11252       {
11253       /* We need to preserve the counter. TMP2 will be used below. */
11254       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11255       stacksize++;
11256       }
11257     if (ket != OP_KET || bra != OP_BRA)
11258       stacksize++;
11259     if (offset != 0)
11260       {
11261       if (common->capture_last_ptr != 0)
11262         stacksize++;
11263       if (common->optimized_cbracket[offset >> 1] == 0)
11264         stacksize += 2;
11265       }
11266     if (opcode != OP_ONCE)
11267       stacksize++;
11268 
11269     if (stacksize > 0)
11270       allocate_stack(common, stacksize);
11271 
11272     stacksize = 0;
11273     if (repeat_type == OP_MINUPTO)
11274       {
11275       /* TMP2 was set above. */
11276       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11277       stacksize++;
11278       }
11279 
11280     if (ket != OP_KET || bra != OP_BRA)
11281       {
11282       if (ket != OP_KET)
11283         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11284       else
11285         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286       stacksize++;
11287       }
11288 
11289     if (offset != 0)
11290       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11291 
11292     if (opcode != OP_ONCE)
11293       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
11294 
11295     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
11296       {
11297       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
11298       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11299       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11300       }
11301 
11302     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
11303 
11304     if (opcode != OP_ONCE)
11305       {
11306       if (alt_max > 4)
11307         add_label_addr(common, next_update_addr++);
11308       else
11309         {
11310         if (alt_count != 2 * sizeof(sljit_uw))
11311           {
11312           JUMPHERE(alt1);
11313           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
11314             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11315           }
11316         else
11317           {
11318           JUMPHERE(alt2);
11319           if (alt_max == 4)
11320             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
11321           }
11322         }
11323       alt_count += sizeof(sljit_uw);
11324       }
11325 
11326     COMPILE_BACKTRACKINGPATH(current->top);
11327     if (current->topbacktracks)
11328       set_jumps(current->topbacktracks, LABEL());
11329     SLJIT_ASSERT(!current->nextbacktracks);
11330     }
11331   while (*cc == OP_ALT);
11332 
11333   if (cond != NULL)
11334     {
11335     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
11336     assert = CURRENT_AS(bracket_backtrack)->u.assert;
11337     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
11338       {
11339       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
11340       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11341       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
11342       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
11343       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
11344       }
11345     JUMPHERE(cond);
11346     }
11347 
11348   /* Free the STR_PTR. */
11349   if (private_data_ptr == 0)
11350     free_stack(common, 1);
11351   }
11352 
11353 if (offset != 0)
11354   {
11355   /* Using both tmp register is better for instruction scheduling. */
11356   if (common->optimized_cbracket[offset >> 1] != 0)
11357     {
11358     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11359     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11360     free_stack(common, 2);
11361     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11362     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11363     }
11364   else
11365     {
11366     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11367     free_stack(common, 1);
11368     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
11369     }
11370   }
11371 else if (opcode == OP_SBRA || opcode == OP_SCOND)
11372   {
11373   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
11374   free_stack(common, 1);
11375   }
11376 else if (opcode == OP_ONCE)
11377   {
11378   cc = ccbegin + GET(ccbegin, 1);
11379   stacksize = needs_control_head ? 1 : 0;
11380 
11381   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11382     {
11383     /* Reset head and drop saved frame. */
11384     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
11385     }
11386   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
11387     {
11388     /* The STR_PTR must be released. */
11389     stacksize++;
11390     }
11391 
11392   if (stacksize > 0)
11393     free_stack(common, stacksize);
11394 
11395   JUMPHERE(once);
11396   /* Restore previous private_data_ptr */
11397   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11398     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
11399   else if (ket == OP_KETRMIN)
11400     {
11401     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11402     /* See the comment below. */
11403     free_stack(common, 2);
11404     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
11405     }
11406   }
11407 
11408 if (repeat_type == OP_EXACT)
11409   {
11410   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11411   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
11412   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
11413   }
11414 else if (ket == OP_KETRMAX)
11415   {
11416   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11417   if (bra != OP_BRAZERO)
11418     free_stack(common, 1);
11419 
11420   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11421   if (bra == OP_BRAZERO)
11422     {
11423     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11424     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
11425     JUMPHERE(brazero);
11426     free_stack(common, 1);
11427     }
11428   }
11429 else if (ket == OP_KETRMIN)
11430   {
11431   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11432 
11433   /* OP_ONCE removes everything in case of a backtrack, so we don't
11434   need to explicitly release the STR_PTR. The extra release would
11435   affect badly the free_stack(2) above. */
11436   if (opcode != OP_ONCE)
11437     free_stack(common, 1);
11438   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
11439   if (opcode == OP_ONCE)
11440     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
11441   else if (bra == OP_BRAMINZERO)
11442     free_stack(common, 1);
11443   }
11444 else if (bra == OP_BRAZERO)
11445   {
11446   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11447   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
11448   JUMPHERE(brazero);
11449   }
11450 }
11451 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)11452 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11453 {
11454 DEFINE_COMPILER;
11455 int offset;
11456 struct sljit_jump *jump;
11457 
11458 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
11459   {
11460   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
11461     {
11462     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
11463     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11464     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11465     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11466     if (common->capture_last_ptr != 0)
11467       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
11468     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11469     if (common->capture_last_ptr != 0)
11470       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
11471     }
11472   set_jumps(current->topbacktracks, LABEL());
11473   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
11474   return;
11475   }
11476 
11477 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
11478 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11479 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
11480 
11481 if (current->topbacktracks)
11482   {
11483   jump = JUMP(SLJIT_JUMP);
11484   set_jumps(current->topbacktracks, LABEL());
11485   /* Drop the stack frame. */
11486   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
11487   JUMPHERE(jump);
11488   }
11489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
11490 }
11491 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)11492 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11493 {
11494 assert_backtrack backtrack;
11495 
11496 current->top = NULL;
11497 current->topbacktracks = NULL;
11498 current->nextbacktracks = NULL;
11499 if (current->cc[1] > OP_ASSERTBACK_NOT)
11500   {
11501   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
11502   compile_bracket_matchingpath(common, current->cc, current);
11503   compile_bracket_backtrackingpath(common, current->top);
11504   }
11505 else
11506   {
11507   memset(&backtrack, 0, sizeof(backtrack));
11508   backtrack.common.cc = current->cc;
11509   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
11510   /* Manual call of compile_assert_matchingpath. */
11511   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
11512   }
11513 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
11514 }
11515 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)11516 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11517 {
11518 DEFINE_COMPILER;
11519 PCRE2_UCHAR opcode = *current->cc;
11520 struct sljit_label *loop;
11521 struct sljit_jump *jump;
11522 
11523 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
11524   {
11525   if (common->then_trap != NULL)
11526     {
11527     SLJIT_ASSERT(common->control_head_ptr != 0);
11528 
11529     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11530     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
11531     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
11532     jump = JUMP(SLJIT_JUMP);
11533 
11534     loop = LABEL();
11535     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11536     JUMPHERE(jump);
11537     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
11538     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
11539     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
11540     return;
11541     }
11542   else if (!common->local_quit_available && common->in_positive_assertion)
11543     {
11544     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
11545     return;
11546     }
11547   }
11548 
11549 if (common->local_quit_available)
11550   {
11551   /* Abort match with a fail. */
11552   if (common->quit_label == NULL)
11553     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
11554   else
11555     JUMPTO(SLJIT_JUMP, common->quit_label);
11556   return;
11557   }
11558 
11559 if (opcode == OP_SKIP_ARG)
11560   {
11561   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
11562   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11563   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
11564   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
11565 
11566   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
11567   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
11568   return;
11569   }
11570 
11571 if (opcode == OP_SKIP)
11572   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11573 else
11574   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
11575 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
11576 }
11577 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)11578 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11579 {
11580 DEFINE_COMPILER;
11581 struct sljit_jump *jump;
11582 int size;
11583 
11584 if (CURRENT_AS(then_trap_backtrack)->then_trap)
11585   {
11586   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
11587   return;
11588   }
11589 
11590 size = CURRENT_AS(then_trap_backtrack)->framesize;
11591 size = 3 + (size < 0 ? 0 : size);
11592 
11593 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
11594 free_stack(common, size);
11595 jump = JUMP(SLJIT_JUMP);
11596 
11597 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
11598 /* STACK_TOP is set by THEN. */
11599 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
11600   {
11601   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11602   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
11603   }
11604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11605 free_stack(common, 3);
11606 
11607 JUMPHERE(jump);
11608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
11609 }
11610 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)11611 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11612 {
11613 DEFINE_COMPILER;
11614 then_trap_backtrack *save_then_trap = common->then_trap;
11615 
11616 while (current)
11617   {
11618   if (current->nextbacktracks != NULL)
11619     set_jumps(current->nextbacktracks, LABEL());
11620   switch(*current->cc)
11621     {
11622     case OP_SET_SOM:
11623     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11624     free_stack(common, 1);
11625     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
11626     break;
11627 
11628     case OP_STAR:
11629     case OP_MINSTAR:
11630     case OP_PLUS:
11631     case OP_MINPLUS:
11632     case OP_QUERY:
11633     case OP_MINQUERY:
11634     case OP_UPTO:
11635     case OP_MINUPTO:
11636     case OP_EXACT:
11637     case OP_POSSTAR:
11638     case OP_POSPLUS:
11639     case OP_POSQUERY:
11640     case OP_POSUPTO:
11641     case OP_STARI:
11642     case OP_MINSTARI:
11643     case OP_PLUSI:
11644     case OP_MINPLUSI:
11645     case OP_QUERYI:
11646     case OP_MINQUERYI:
11647     case OP_UPTOI:
11648     case OP_MINUPTOI:
11649     case OP_EXACTI:
11650     case OP_POSSTARI:
11651     case OP_POSPLUSI:
11652     case OP_POSQUERYI:
11653     case OP_POSUPTOI:
11654     case OP_NOTSTAR:
11655     case OP_NOTMINSTAR:
11656     case OP_NOTPLUS:
11657     case OP_NOTMINPLUS:
11658     case OP_NOTQUERY:
11659     case OP_NOTMINQUERY:
11660     case OP_NOTUPTO:
11661     case OP_NOTMINUPTO:
11662     case OP_NOTEXACT:
11663     case OP_NOTPOSSTAR:
11664     case OP_NOTPOSPLUS:
11665     case OP_NOTPOSQUERY:
11666     case OP_NOTPOSUPTO:
11667     case OP_NOTSTARI:
11668     case OP_NOTMINSTARI:
11669     case OP_NOTPLUSI:
11670     case OP_NOTMINPLUSI:
11671     case OP_NOTQUERYI:
11672     case OP_NOTMINQUERYI:
11673     case OP_NOTUPTOI:
11674     case OP_NOTMINUPTOI:
11675     case OP_NOTEXACTI:
11676     case OP_NOTPOSSTARI:
11677     case OP_NOTPOSPLUSI:
11678     case OP_NOTPOSQUERYI:
11679     case OP_NOTPOSUPTOI:
11680     case OP_TYPESTAR:
11681     case OP_TYPEMINSTAR:
11682     case OP_TYPEPLUS:
11683     case OP_TYPEMINPLUS:
11684     case OP_TYPEQUERY:
11685     case OP_TYPEMINQUERY:
11686     case OP_TYPEUPTO:
11687     case OP_TYPEMINUPTO:
11688     case OP_TYPEEXACT:
11689     case OP_TYPEPOSSTAR:
11690     case OP_TYPEPOSPLUS:
11691     case OP_TYPEPOSQUERY:
11692     case OP_TYPEPOSUPTO:
11693     case OP_CLASS:
11694     case OP_NCLASS:
11695 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
11696     case OP_XCLASS:
11697 #endif
11698     compile_iterator_backtrackingpath(common, current);
11699     break;
11700 
11701     case OP_REF:
11702     case OP_REFI:
11703     case OP_DNREF:
11704     case OP_DNREFI:
11705     compile_ref_iterator_backtrackingpath(common, current);
11706     break;
11707 
11708     case OP_RECURSE:
11709     compile_recurse_backtrackingpath(common, current);
11710     break;
11711 
11712     case OP_ASSERT:
11713     case OP_ASSERT_NOT:
11714     case OP_ASSERTBACK:
11715     case OP_ASSERTBACK_NOT:
11716     compile_assert_backtrackingpath(common, current);
11717     break;
11718 
11719     case OP_ONCE:
11720     case OP_BRA:
11721     case OP_CBRA:
11722     case OP_COND:
11723     case OP_SBRA:
11724     case OP_SCBRA:
11725     case OP_SCOND:
11726     compile_bracket_backtrackingpath(common, current);
11727     break;
11728 
11729     case OP_BRAZERO:
11730     if (current->cc[1] > OP_ASSERTBACK_NOT)
11731       compile_bracket_backtrackingpath(common, current);
11732     else
11733       compile_assert_backtrackingpath(common, current);
11734     break;
11735 
11736     case OP_BRAPOS:
11737     case OP_CBRAPOS:
11738     case OP_SBRAPOS:
11739     case OP_SCBRAPOS:
11740     case OP_BRAPOSZERO:
11741     compile_bracketpos_backtrackingpath(common, current);
11742     break;
11743 
11744     case OP_BRAMINZERO:
11745     compile_braminzero_backtrackingpath(common, current);
11746     break;
11747 
11748     case OP_MARK:
11749     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
11750     if (common->has_skip_arg)
11751       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11752     free_stack(common, common->has_skip_arg ? 5 : 1);
11753     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
11754     if (common->has_skip_arg)
11755       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
11756     break;
11757 
11758     case OP_THEN:
11759     case OP_THEN_ARG:
11760     case OP_PRUNE:
11761     case OP_PRUNE_ARG:
11762     case OP_SKIP:
11763     case OP_SKIP_ARG:
11764     compile_control_verb_backtrackingpath(common, current);
11765     break;
11766 
11767     case OP_COMMIT:
11768     case OP_COMMIT_ARG:
11769     if (!common->local_quit_available)
11770       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11771     if (common->quit_label == NULL)
11772       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
11773     else
11774       JUMPTO(SLJIT_JUMP, common->quit_label);
11775     break;
11776 
11777     case OP_CALLOUT:
11778     case OP_CALLOUT_STR:
11779     case OP_FAIL:
11780     case OP_ACCEPT:
11781     case OP_ASSERT_ACCEPT:
11782     set_jumps(current->topbacktracks, LABEL());
11783     break;
11784 
11785     case OP_THEN_TRAP:
11786     /* A virtual opcode for then traps. */
11787     compile_then_trap_backtrackingpath(common, current);
11788     break;
11789 
11790     default:
11791     SLJIT_UNREACHABLE();
11792     break;
11793     }
11794   current = current->prev;
11795   }
11796 common->then_trap = save_then_trap;
11797 }
11798 
compile_recurse(compiler_common * common)11799 static SLJIT_INLINE void compile_recurse(compiler_common *common)
11800 {
11801 DEFINE_COMPILER;
11802 PCRE2_SPTR cc = common->start + common->currententry->start;
11803 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
11804 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
11805 BOOL needs_control_head;
11806 BOOL has_quit;
11807 BOOL has_accept;
11808 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
11809 int alt_count, alt_max, local_size;
11810 backtrack_common altbacktrack;
11811 jump_list *match = NULL;
11812 sljit_uw *next_update_addr = NULL;
11813 struct sljit_jump *alt1 = NULL;
11814 struct sljit_jump *alt2 = NULL;
11815 struct sljit_jump *accept_exit = NULL;
11816 struct sljit_label *quit;
11817 
11818 /* Recurse captures then. */
11819 common->then_trap = NULL;
11820 
11821 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
11822 
11823 alt_max = no_alternatives(cc);
11824 alt_count = 0;
11825 
11826 /* Matching path. */
11827 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
11828 common->currententry->entry_label = LABEL();
11829 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
11830 
11831 sljit_emit_fast_enter(compiler, TMP2, 0);
11832 count_match(common);
11833 
11834 local_size = (alt_max > 1) ? 2 : 1;
11835 
11836 /* (Reversed) stack layout:
11837    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
11838 
11839 allocate_stack(common, private_data_size + local_size);
11840 /* Save return address. */
11841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
11842 
11843 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
11844 
11845 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
11846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
11847 
11848 if (needs_control_head)
11849   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11850 
11851 if (alt_max > 1)
11852   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11853 
11854 memset(&altbacktrack, 0, sizeof(backtrack_common));
11855 common->quit_label = NULL;
11856 common->accept_label = NULL;
11857 common->quit = NULL;
11858 common->accept = NULL;
11859 altbacktrack.cc = ccbegin;
11860 cc += GET(cc, 1);
11861 while (1)
11862   {
11863   altbacktrack.top = NULL;
11864   altbacktrack.topbacktracks = NULL;
11865 
11866   if (altbacktrack.cc != ccbegin)
11867     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11868 
11869   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
11870   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11871     return;
11872 
11873   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
11874   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11875 
11876   if (alt_max > 1 || has_accept)
11877     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
11878 
11879   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
11880 
11881   if (alt_count == 0)
11882     {
11883     /* Backtracking path entry. */
11884     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
11885     common->currententry->backtrack_label = LABEL();
11886     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
11887 
11888     sljit_emit_fast_enter(compiler, TMP1, 0);
11889 
11890     if (has_accept)
11891       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
11892 
11893     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11894     /* Save return address. */
11895     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
11896 
11897     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
11898 
11899     if (alt_max > 1)
11900       {
11901       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11902       free_stack(common, 2);
11903 
11904       if (alt_max > 4)
11905         {
11906           /* Table jump if alt_max is greater than 4. */
11907           next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
11908           if (SLJIT_UNLIKELY(next_update_addr == NULL))
11909             return;
11910           sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
11911           add_label_addr(common, next_update_addr++);
11912         }
11913       else
11914         {
11915         if (alt_max == 4)
11916           alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11917         alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11918         }
11919       }
11920     else
11921       free_stack(common, has_accept ? 2 : 1);
11922     }
11923   else if (alt_max > 4)
11924     add_label_addr(common, next_update_addr++);
11925   else
11926     {
11927     if (alt_count != 2 * sizeof(sljit_uw))
11928       {
11929       JUMPHERE(alt1);
11930       if (alt_max == 3 && alt_count == sizeof(sljit_uw))
11931         alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11932       }
11933     else
11934       {
11935       JUMPHERE(alt2);
11936       if (alt_max == 4)
11937         alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
11938       }
11939     }
11940 
11941   alt_count += sizeof(sljit_uw);
11942 
11943   compile_backtrackingpath(common, altbacktrack.top);
11944   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11945     return;
11946   set_jumps(altbacktrack.topbacktracks, LABEL());
11947 
11948   if (*cc != OP_ALT)
11949     break;
11950 
11951   altbacktrack.cc = cc + 1 + LINK_SIZE;
11952   cc += GET(cc, 1);
11953   }
11954 
11955 /* No alternative is matched. */
11956 
11957 quit = LABEL();
11958 
11959 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
11960 
11961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
11962 free_stack(common, private_data_size + local_size);
11963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
11964 sljit_emit_fast_return(compiler, TMP2, 0);
11965 
11966 if (common->quit != NULL)
11967   {
11968   SLJIT_ASSERT(has_quit);
11969 
11970   set_jumps(common->quit, LABEL());
11971   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11972   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
11973   JUMPTO(SLJIT_JUMP, quit);
11974   }
11975 
11976 if (has_accept)
11977   {
11978   JUMPHERE(accept_exit);
11979   free_stack(common, 2);
11980 
11981   /* Save return address. */
11982   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
11983 
11984   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
11985 
11986   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
11987   free_stack(common, private_data_size + local_size);
11988   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
11989   sljit_emit_fast_return(compiler, TMP2, 0);
11990   }
11991 
11992 if (common->accept != NULL)
11993   {
11994   SLJIT_ASSERT(has_accept);
11995 
11996   set_jumps(common->accept, LABEL());
11997 
11998   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11999   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
12000 
12001   allocate_stack(common, 2);
12002   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
12003   }
12004 
12005 set_jumps(match, LABEL());
12006 
12007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12008 
12009 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
12010 
12011 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
12012 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
12013 sljit_emit_fast_return(compiler, TMP2, 0);
12014 }
12015 
12016 #undef COMPILE_BACKTRACKINGPATH
12017 #undef CURRENT_AS
12018 
jit_compile(pcre2_code * code,sljit_u32 mode)12019 static int jit_compile(pcre2_code *code, sljit_u32 mode)
12020 {
12021 pcre2_real_code *re = (pcre2_real_code *)code;
12022 struct sljit_compiler *compiler;
12023 backtrack_common rootbacktrack;
12024 compiler_common common_data;
12025 compiler_common *common = &common_data;
12026 const sljit_u8 *tables = re->tables;
12027 void *allocator_data = &re->memctl;
12028 int private_data_size;
12029 PCRE2_SPTR ccend;
12030 executable_functions *functions;
12031 void *executable_func;
12032 sljit_uw executable_size;
12033 sljit_uw total_length;
12034 label_addr_list *label_addr;
12035 struct sljit_label *mainloop_label = NULL;
12036 struct sljit_label *continue_match_label;
12037 struct sljit_label *empty_match_found_label = NULL;
12038 struct sljit_label *empty_match_backtrack_label = NULL;
12039 struct sljit_label *reset_match_label;
12040 struct sljit_label *quit_label;
12041 struct sljit_jump *jump;
12042 struct sljit_jump *minlength_check_failed = NULL;
12043 struct sljit_jump *reqbyte_notfound = NULL;
12044 struct sljit_jump *empty_match = NULL;
12045 struct sljit_jump *end_anchor_failed = NULL;
12046 
12047 SLJIT_ASSERT(tables);
12048 
12049 memset(&rootbacktrack, 0, sizeof(backtrack_common));
12050 memset(common, 0, sizeof(compiler_common));
12051 common->re = re;
12052 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
12053 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
12054 
12055 common->start = rootbacktrack.cc;
12056 common->read_only_data_head = NULL;
12057 common->fcc = tables + fcc_offset;
12058 common->lcc = (sljit_sw)(tables + lcc_offset);
12059 common->mode = mode;
12060 common->might_be_empty = re->minlength == 0;
12061 common->nltype = NLTYPE_FIXED;
12062 switch(re->newline_convention)
12063   {
12064   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
12065   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
12066   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
12067   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
12068   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
12069   default: return PCRE2_ERROR_INTERNAL;
12070   }
12071 common->nlmax = READ_CHAR_MAX;
12072 common->nlmin = 0;
12073 if (re->bsr_convention == PCRE2_BSR_UNICODE)
12074   common->bsr_nltype = NLTYPE_ANY;
12075 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
12076   common->bsr_nltype = NLTYPE_ANYCRLF;
12077 else
12078   {
12079 #ifdef BSR_ANYCRLF
12080   common->bsr_nltype = NLTYPE_ANYCRLF;
12081 #else
12082   common->bsr_nltype = NLTYPE_ANY;
12083 #endif
12084   }
12085 common->bsr_nlmax = READ_CHAR_MAX;
12086 common->bsr_nlmin = 0;
12087 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
12088 common->ctypes = (sljit_sw)(tables + ctypes_offset);
12089 common->name_count = re->name_count;
12090 common->name_entry_size = re->name_entry_size;
12091 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
12092 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
12093 #ifdef SUPPORT_UNICODE
12094 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
12095 common->utf = (re->overall_options & PCRE2_UTF) != 0;
12096 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
12097 if (common->utf)
12098   {
12099   if (common->nltype == NLTYPE_ANY)
12100     common->nlmax = 0x2029;
12101   else if (common->nltype == NLTYPE_ANYCRLF)
12102     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
12103   else
12104     {
12105     /* We only care about the first newline character. */
12106     common->nlmax = common->newline & 0xff;
12107     }
12108 
12109   if (common->nltype == NLTYPE_FIXED)
12110     common->nlmin = common->newline & 0xff;
12111   else
12112     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
12113 
12114   if (common->bsr_nltype == NLTYPE_ANY)
12115     common->bsr_nlmax = 0x2029;
12116   else
12117     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
12118   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
12119   }
12120 #endif /* SUPPORT_UNICODE */
12121 ccend = bracketend(common->start);
12122 
12123 /* Calculate the local space size on the stack. */
12124 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
12125 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
12126 if (!common->optimized_cbracket)
12127   return PCRE2_ERROR_NOMEMORY;
12128 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
12129 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
12130 #else
12131 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
12132 #endif
12133 
12134 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
12135 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
12136 common->capture_last_ptr = common->ovector_start;
12137 common->ovector_start += sizeof(sljit_sw);
12138 #endif
12139 if (!check_opcode_types(common, common->start, ccend))
12140   {
12141   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12142   return PCRE2_ERROR_NOMEMORY;
12143   }
12144 
12145 /* Checking flags and updating ovector_start. */
12146 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12147   {
12148   common->req_char_ptr = common->ovector_start;
12149   common->ovector_start += sizeof(sljit_sw);
12150   }
12151 if (mode != PCRE2_JIT_COMPLETE)
12152   {
12153   common->start_used_ptr = common->ovector_start;
12154   common->ovector_start += sizeof(sljit_sw);
12155   if (mode == PCRE2_JIT_PARTIAL_SOFT)
12156     {
12157     common->hit_start = common->ovector_start;
12158     common->ovector_start += sizeof(sljit_sw);
12159     }
12160   }
12161 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
12162   {
12163   common->match_end_ptr = common->ovector_start;
12164   common->ovector_start += sizeof(sljit_sw);
12165   }
12166 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
12167 common->control_head_ptr = 1;
12168 #endif
12169 if (common->control_head_ptr != 0)
12170   {
12171   common->control_head_ptr = common->ovector_start;
12172   common->ovector_start += sizeof(sljit_sw);
12173   }
12174 if (common->has_set_som)
12175   {
12176   /* Saving the real start pointer is necessary. */
12177   common->start_ptr = common->ovector_start;
12178   common->ovector_start += sizeof(sljit_sw);
12179   }
12180 
12181 /* Aligning ovector to even number of sljit words. */
12182 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
12183   common->ovector_start += sizeof(sljit_sw);
12184 
12185 if (common->start_ptr == 0)
12186   common->start_ptr = OVECTOR(0);
12187 
12188 /* Capturing brackets cannot be optimized if callouts are allowed. */
12189 if (common->capture_last_ptr != 0)
12190   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
12191 
12192 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
12193 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
12194 
12195 total_length = ccend - common->start;
12196 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
12197 if (!common->private_data_ptrs)
12198   {
12199   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12200   return PCRE2_ERROR_NOMEMORY;
12201   }
12202 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
12203 
12204 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
12205 set_private_data_ptrs(common, &private_data_size, ccend);
12206 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12207   {
12208   if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
12209     detect_fast_fail(common, common->start, &private_data_size, 4);
12210   }
12211 
12212 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
12213 
12214 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
12215   {
12216   SLJIT_FREE(common->private_data_ptrs, allocator_data);
12217   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12218   return PCRE2_ERROR_NOMEMORY;
12219   }
12220 
12221 if (common->has_then)
12222   {
12223   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
12224   memset(common->then_offsets, 0, total_length);
12225   set_then_offsets(common, common->start, NULL);
12226   }
12227 
12228 compiler = sljit_create_compiler(allocator_data);
12229 if (!compiler)
12230   {
12231   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12232   SLJIT_FREE(common->private_data_ptrs, allocator_data);
12233   return PCRE2_ERROR_NOMEMORY;
12234   }
12235 common->compiler = compiler;
12236 
12237 /* Main pcre_jit_exec entry. */
12238 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
12239 
12240 /* Register init. */
12241 reset_ovector(common, (re->top_bracket + 1) * 2);
12242 if (common->req_char_ptr != 0)
12243   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
12244 
12245 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
12246 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
12247 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
12248 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
12249 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
12250 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
12251 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
12252 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
12253 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
12255 
12256 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
12257   reset_fast_fail(common);
12258 
12259 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12260   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
12261 if (common->mark_ptr != 0)
12262   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
12263 if (common->control_head_ptr != 0)
12264   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
12265 
12266 /* Main part of the matching */
12267 if ((re->overall_options & PCRE2_ANCHORED) == 0)
12268   {
12269   mainloop_label = mainloop_entry(common);
12270   continue_match_label = LABEL();
12271   /* Forward search if possible. */
12272   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12273     {
12274     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
12275       ;
12276     else if ((re->flags & PCRE2_FIRSTSET) != 0)
12277       fast_forward_first_char(common);
12278     else if ((re->flags & PCRE2_STARTLINE) != 0)
12279       fast_forward_newline(common);
12280     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
12281       fast_forward_start_bits(common);
12282     }
12283   }
12284 else
12285   continue_match_label = LABEL();
12286 
12287 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12288   {
12289   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12290   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
12291   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
12292   }
12293 if (common->req_char_ptr != 0)
12294   reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
12295 
12296 /* Store the current STR_PTR in OVECTOR(0). */
12297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12298 /* Copy the limit of allowed recursions. */
12299 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
12300 if (common->capture_last_ptr != 0)
12301   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
12302 if (common->fast_forward_bc_ptr != NULL)
12303   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
12304 
12305 if (common->start_ptr != OVECTOR(0))
12306   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
12307 
12308 /* Copy the beginning of the string. */
12309 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12310   {
12311   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
12312   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
12313   JUMPHERE(jump);
12314   }
12315 else if (mode == PCRE2_JIT_PARTIAL_HARD)
12316   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
12317 
12318 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
12319 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12320   {
12321   sljit_free_compiler(compiler);
12322   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12323   SLJIT_FREE(common->private_data_ptrs, allocator_data);
12324   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
12325   return PCRE2_ERROR_NOMEMORY;
12326   }
12327 
12328 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
12329   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
12330 
12331 if (common->might_be_empty)
12332   {
12333   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12334   empty_match_found_label = LABEL();
12335   }
12336 
12337 common->accept_label = LABEL();
12338 if (common->accept != NULL)
12339   set_jumps(common->accept, common->accept_label);
12340 
12341 /* This means we have a match. Update the ovector. */
12342 copy_ovector(common, re->top_bracket + 1);
12343 common->quit_label = common->abort_label = LABEL();
12344 if (common->quit != NULL)
12345   set_jumps(common->quit, common->quit_label);
12346 if (common->abort != NULL)
12347   set_jumps(common->abort, common->abort_label);
12348 if (minlength_check_failed != NULL)
12349   SET_LABEL(minlength_check_failed, common->abort_label);
12350 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
12351 
12352 if (common->failed_match != NULL)
12353   {
12354   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
12355   set_jumps(common->failed_match, LABEL());
12356   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12357   JUMPTO(SLJIT_JUMP, common->abort_label);
12358   }
12359 
12360 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
12361   JUMPHERE(end_anchor_failed);
12362 
12363 if (mode != PCRE2_JIT_COMPLETE)
12364   {
12365   common->partialmatchlabel = LABEL();
12366   set_jumps(common->partialmatch, common->partialmatchlabel);
12367   return_with_partial_match(common, common->quit_label);
12368   }
12369 
12370 if (common->might_be_empty)
12371   empty_match_backtrack_label = LABEL();
12372 compile_backtrackingpath(common, rootbacktrack.top);
12373 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12374   {
12375   sljit_free_compiler(compiler);
12376   SLJIT_FREE(common->optimized_cbracket, allocator_data);
12377   SLJIT_FREE(common->private_data_ptrs, allocator_data);
12378   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
12379   return PCRE2_ERROR_NOMEMORY;
12380   }
12381 
12382 SLJIT_ASSERT(rootbacktrack.prev == NULL);
12383 reset_match_label = LABEL();
12384 
12385 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12386   {
12387   /* Update hit_start only in the first time. */
12388   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
12389   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
12390   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
12391   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
12392   JUMPHERE(jump);
12393   }
12394 
12395 /* Check we have remaining characters. */
12396 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
12397   {
12398   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
12399   }
12400 
12401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
12402     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
12403 
12404 if ((re->overall_options & PCRE2_ANCHORED) == 0)
12405   {
12406   if (common->ff_newline_shortcut != NULL)
12407     {
12408     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
12409     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
12410       {
12411       if (common->match_end_ptr != 0)
12412         {
12413         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
12414         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
12415         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
12416         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
12417         }
12418       else
12419         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
12420       }
12421     }
12422   else
12423     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
12424   }
12425 
12426 /* No more remaining characters. */
12427 if (reqbyte_notfound != NULL)
12428   JUMPHERE(reqbyte_notfound);
12429 
12430 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12431   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
12432 
12433 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12434 JUMPTO(SLJIT_JUMP, common->quit_label);
12435 
12436 flush_stubs(common);
12437 
12438 if (common->might_be_empty)
12439   {
12440   JUMPHERE(empty_match);
12441   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12442   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12443   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12444   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
12445   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12446   JUMPTO(SLJIT_ZERO, empty_match_found_label);
12447   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
12448   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
12449   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
12450   }
12451 
12452 common->fast_forward_bc_ptr = NULL;
12453 common->fast_fail_start_ptr = 0;
12454 common->fast_fail_end_ptr = 0;
12455 common->currententry = common->entries;
12456 common->local_quit_available = TRUE;
12457 quit_label = common->quit_label;
12458 while (common->currententry != NULL)
12459   {
12460   /* Might add new entries. */
12461   compile_recurse(common);
12462   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12463     {
12464     sljit_free_compiler(compiler);
12465     SLJIT_FREE(common->optimized_cbracket, allocator_data);
12466     SLJIT_FREE(common->private_data_ptrs, allocator_data);
12467     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
12468     return PCRE2_ERROR_NOMEMORY;
12469     }
12470   flush_stubs(common);
12471   common->currententry = common->currententry->next;
12472   }
12473 common->local_quit_available = FALSE;
12474 common->quit_label = quit_label;
12475 
12476 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
12477 /* This is a (really) rare case. */
12478 set_jumps(common->stackalloc, LABEL());
12479 /* RETURN_ADDR is not a saved register. */
12480 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
12481 
12482 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12483 
12484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
12485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
12486 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
12487 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
12488 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
12489 
12490 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
12491 
12492 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
12493 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
12494 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
12495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
12496 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
12497 sljit_emit_fast_return(compiler, TMP1, 0);
12498 
12499 /* Allocation failed. */
12500 JUMPHERE(jump);
12501 /* We break the return address cache here, but this is a really rare case. */
12502 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
12503 JUMPTO(SLJIT_JUMP, common->quit_label);
12504 
12505 /* Call limit reached. */
12506 set_jumps(common->calllimit, LABEL());
12507 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
12508 JUMPTO(SLJIT_JUMP, common->quit_label);
12509 
12510 if (common->revertframes != NULL)
12511   {
12512   set_jumps(common->revertframes, LABEL());
12513   do_revertframes(common);
12514   }
12515 if (common->wordboundary != NULL)
12516   {
12517   set_jumps(common->wordboundary, LABEL());
12518   check_wordboundary(common);
12519   }
12520 if (common->anynewline != NULL)
12521   {
12522   set_jumps(common->anynewline, LABEL());
12523   check_anynewline(common);
12524   }
12525 if (common->hspace != NULL)
12526   {
12527   set_jumps(common->hspace, LABEL());
12528   check_hspace(common);
12529   }
12530 if (common->vspace != NULL)
12531   {
12532   set_jumps(common->vspace, LABEL());
12533   check_vspace(common);
12534   }
12535 if (common->casefulcmp != NULL)
12536   {
12537   set_jumps(common->casefulcmp, LABEL());
12538   do_casefulcmp(common);
12539   }
12540 if (common->caselesscmp != NULL)
12541   {
12542   set_jumps(common->caselesscmp, LABEL());
12543   do_caselesscmp(common);
12544   }
12545 if (common->reset_match != NULL)
12546   {
12547   set_jumps(common->reset_match, LABEL());
12548   do_reset_match(common, (re->top_bracket + 1) * 2);
12549   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
12550   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
12551   JUMPTO(SLJIT_JUMP, reset_match_label);
12552   }
12553 #ifdef SUPPORT_UNICODE
12554 #if PCRE2_CODE_UNIT_WIDTH == 8
12555 if (common->utfreadchar != NULL)
12556   {
12557   set_jumps(common->utfreadchar, LABEL());
12558   do_utfreadchar(common);
12559   }
12560 if (common->utfreadchar16 != NULL)
12561   {
12562   set_jumps(common->utfreadchar16, LABEL());
12563   do_utfreadchar16(common);
12564   }
12565 if (common->utfreadtype8 != NULL)
12566   {
12567   set_jumps(common->utfreadtype8, LABEL());
12568   do_utfreadtype8(common);
12569   }
12570 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
12571 if (common->getucd != NULL)
12572   {
12573   set_jumps(common->getucd, LABEL());
12574   do_getucd(common);
12575   }
12576 #endif /* SUPPORT_UNICODE */
12577 
12578 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12579 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12580 
12581 executable_func = sljit_generate_code(compiler);
12582 executable_size = sljit_get_generated_code_size(compiler);
12583 label_addr = common->label_addrs;
12584 while (label_addr != NULL)
12585   {
12586   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
12587   label_addr = label_addr->next;
12588   }
12589 sljit_free_compiler(compiler);
12590 if (executable_func == NULL)
12591   {
12592   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
12593   return PCRE2_ERROR_NOMEMORY;
12594   }
12595 
12596 /* Reuse the function descriptor if possible. */
12597 if (re->executable_jit != NULL)
12598   functions = (executable_functions *)re->executable_jit;
12599 else
12600   {
12601   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
12602   if (functions == NULL)
12603     {
12604     /* This case is highly unlikely since we just recently
12605     freed a lot of memory. Not impossible though. */
12606     sljit_free_code(executable_func);
12607     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
12608     return PCRE2_ERROR_NOMEMORY;
12609     }
12610   memset(functions, 0, sizeof(executable_functions));
12611   functions->top_bracket = re->top_bracket + 1;
12612   functions->limit_match = re->limit_match;
12613   re->executable_jit = functions;
12614   }
12615 
12616 /* Turn mode into an index. */
12617 if (mode == PCRE2_JIT_COMPLETE)
12618   mode = 0;
12619 else
12620   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
12621 
12622 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
12623 functions->executable_funcs[mode] = executable_func;
12624 functions->read_only_data_heads[mode] = common->read_only_data_head;
12625 functions->executable_sizes[mode] = executable_size;
12626 return 0;
12627 }
12628 
12629 #endif
12630 
12631 /*************************************************
12632 *        JIT compile a Regular Expression        *
12633 *************************************************/
12634 
12635 /* This function used JIT to convert a previously-compiled pattern into machine
12636 code.
12637 
12638 Arguments:
12639   code          a compiled pattern
12640   options       JIT option bits
12641 
12642 Returns:        0: success or (*NOJIT) was used
12643                <0: an error code
12644 */
12645 
12646 #define PUBLIC_JIT_COMPILE_OPTIONS \
12647   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
12648 
12649 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)12650 pcre2_jit_compile(pcre2_code *code, uint32_t options)
12651 {
12652 #ifndef SUPPORT_JIT
12653 
12654 (void)code;
12655 (void)options;
12656 return PCRE2_ERROR_JIT_BADOPTION;
12657 
12658 #else  /* SUPPORT_JIT */
12659 
12660 pcre2_real_code *re = (pcre2_real_code *)code;
12661 executable_functions *functions;
12662 int result;
12663 
12664 if (code == NULL)
12665   return PCRE2_ERROR_NULL;
12666 
12667 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
12668   return PCRE2_ERROR_JIT_BADOPTION;
12669 
12670 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
12671 
12672 functions = (executable_functions *)re->executable_jit;
12673 
12674 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
12675     || functions->executable_funcs[0] == NULL)) {
12676   result = jit_compile(code, PCRE2_JIT_COMPLETE);
12677   if (result != 0)
12678     return result;
12679   }
12680 
12681 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
12682     || functions->executable_funcs[1] == NULL)) {
12683   result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
12684   if (result != 0)
12685     return result;
12686   }
12687 
12688 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
12689     || functions->executable_funcs[2] == NULL)) {
12690   result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
12691   if (result != 0)
12692     return result;
12693   }
12694 
12695 return 0;
12696 
12697 #endif  /* SUPPORT_JIT */
12698 }
12699 
12700 /* JIT compiler uses an all-in-one approach. This improves security,
12701    since the code generator functions are not exported. */
12702 
12703 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
12704 
12705 #include "pcre2_jit_match.c"
12706 #include "pcre2_jit_misc.c"
12707 
12708 /* End of pcre2_jit_compile.c */
12709