• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include "pcre2_internal.h"
46 
47 #ifdef SUPPORT_JIT
48 
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52 
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56 
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62 
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65 
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71 
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77 
78 #include "sljit/sljitLir.c"
79 
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83 
84 /* Defines for debugging purposes. */
85 
86 /* 1 - Use unoptimized capturing brackets.
87    2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89 
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92 
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96 
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100 
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105 
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108 
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114 
115   'ab' - 'a' and 'b' regexps are concatenated
116   'a+' - 'a' is the sub-expression of the '+' operator
117 
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123 
124  Greedy star operator (*) :
125    Matching path: match happens.
126    Backtrack path: match failed.
127  Non-greedy star operator (*?) :
128    Matching path: no need to perform a match.
129    Backtrack path: match is required.
130 
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134 
135    A(B|C)D
136 
137 The generated code will be the following:
138 
139  A matching path
140  '(' matching path (pushing arguments to the stack)
141  B matching path
142  ')' matching path (pushing arguments to the stack)
143  D matching path
144  return with successful match
145 
146  D backtrack path
147  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148  B backtrack path
149  C expected path
150  jump to D matching path
151  C backtrack path
152  A backtrack path
153 
154  Notice, that the order of backtrack code paths are the opposite of the fast
155  code paths. In this way the topmost value on the stack is always belong
156  to the current backtrack code path. The backtrack path must check
157  whether there is a next alternative. If so, it needs to jump back to
158  the matching path eventually. Otherwise it needs to clear out its own stack
159  frame and continue the execution on the backtrack code paths.
160 */
161 
162 /*
163 Saved stack frames:
164 
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169 
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172 
173 Thus we can restore the private data to a particular point in the stack.
174 */
175 
176 typedef struct jit_arguments {
177   /* Pointers first. */
178   struct sljit_stack *stack;
179   PCRE2_SPTR str;
180   PCRE2_SPTR begin;
181   PCRE2_SPTR end;
182   pcre2_match_data *match_data;
183   PCRE2_SPTR startchar_ptr;
184   PCRE2_UCHAR *mark_ptr;
185   int (*callout)(pcre2_callout_block *, void *);
186   void *callout_data;
187   /* Everything else after. */
188   sljit_uw offset_limit;
189   sljit_u32 limit_match;
190   sljit_u32 oveccount;
191   sljit_u32 options;
192 } jit_arguments;
193 
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195 
196 typedef struct executable_functions {
197   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_u32 top_bracket;
201   sljit_u32 limit_match;
202 } executable_functions;
203 
204 typedef struct jump_list {
205   struct sljit_jump *jump;
206   struct jump_list *next;
207 } jump_list;
208 
209 typedef struct stub_list {
210   struct sljit_jump *start;
211   struct sljit_label *quit;
212   struct stub_list *next;
213 } stub_list;
214 
215 typedef struct label_addr_list {
216   struct sljit_label *label;
217   sljit_uw *update_addr;
218   struct label_addr_list *next;
219 } label_addr_list;
220 
221 enum frame_types {
222   no_frame = -1,
223   no_stack = -2
224 };
225 
226 enum control_types {
227   type_mark = 0,
228   type_then_trap = 1
229 };
230 
231 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
232 
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238   /* Concatenation stack. */
239   struct backtrack_common *prev;
240   jump_list *nextbacktracks;
241   /* Internal stack (for component operators). */
242   struct backtrack_common *top;
243   jump_list *topbacktracks;
244   /* Opcode pointer. */
245   PCRE2_SPTR cc;
246 } backtrack_common;
247 
248 typedef struct assert_backtrack {
249   backtrack_common common;
250   jump_list *condfailed;
251   /* Less than 0 if a frame is not needed. */
252   int framesize;
253   /* Points to our private memory word on the stack. */
254   int private_data_ptr;
255   /* For iterators. */
256   struct sljit_label *matchingpath;
257 } assert_backtrack;
258 
259 typedef struct bracket_backtrack {
260   backtrack_common common;
261   /* Where to coninue if an alternative is successfully matched. */
262   struct sljit_label *alternative_matchingpath;
263   /* For rmin and rmax iterators. */
264   struct sljit_label *recursive_matchingpath;
265   /* For greedy ? operator. */
266   struct sljit_label *zero_matchingpath;
267   /* Contains the branches of a failed condition. */
268   union {
269     /* Both for OP_COND, OP_SCOND. */
270     jump_list *condfailed;
271     assert_backtrack *assert;
272     /* For OP_ONCE. Less than 0 if not needed. */
273     int framesize;
274   } u;
275   /* Points to our private memory word on the stack. */
276   int private_data_ptr;
277 } bracket_backtrack;
278 
279 typedef struct bracketpos_backtrack {
280   backtrack_common common;
281   /* Points to our private memory word on the stack. */
282   int private_data_ptr;
283   /* Reverting stack is needed. */
284   int framesize;
285   /* Allocated stack size. */
286   int stacksize;
287 } bracketpos_backtrack;
288 
289 typedef struct braminzero_backtrack {
290   backtrack_common common;
291   struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293 
294 typedef struct char_iterator_backtrack {
295   backtrack_common common;
296   /* Next iteration. */
297   struct sljit_label *matchingpath;
298   union {
299     jump_list *backtracks;
300     struct {
301       unsigned int othercasebit;
302       PCRE2_UCHAR chr;
303       BOOL enabled;
304     } charpos;
305   } u;
306 } char_iterator_backtrack;
307 
308 typedef struct ref_iterator_backtrack {
309   backtrack_common common;
310   /* Next iteration. */
311   struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313 
314 typedef struct recurse_entry {
315   struct recurse_entry *next;
316   /* Contains the function entry. */
317   struct sljit_label *entry;
318   /* Collects the calls until the function is not created. */
319   jump_list *calls;
320   /* Points to the starting opcode. */
321   sljit_sw start;
322 } recurse_entry;
323 
324 typedef struct recurse_backtrack {
325   backtrack_common common;
326   BOOL inlined_pattern;
327 } recurse_backtrack;
328 
329 #define OP_THEN_TRAP OP_TABLE_LENGTH
330 
331 typedef struct then_trap_backtrack {
332   backtrack_common common;
333   /* If then_trap is not NULL, this structure contains the real
334   then_trap for the backtracking path. */
335   struct then_trap_backtrack *then_trap;
336   /* Points to the starting opcode. */
337   sljit_sw start;
338   /* Exit point for the then opcodes of this alternative. */
339   jump_list *quit;
340   /* Frame size of the current alternative. */
341   int framesize;
342 } then_trap_backtrack;
343 
344 #define MAX_RANGE_SIZE 4
345 
346 typedef struct compiler_common {
347   /* The sljit ceneric compiler. */
348   struct sljit_compiler *compiler;
349   /* First byte code. */
350   PCRE2_SPTR start;
351   /* Maps private data offset to each opcode. */
352   sljit_s32 *private_data_ptrs;
353   /* Chain list of read-only data ptrs. */
354   void *read_only_data_head;
355   /* Tells whether the capturing bracket is optimized. */
356   sljit_u8 *optimized_cbracket;
357   /* Tells whether the starting offset is a target of then. */
358   sljit_u8 *then_offsets;
359   /* Current position where a THEN must jump. */
360   then_trap_backtrack *then_trap;
361   /* Starting offset of private data for capturing brackets. */
362   sljit_s32 cbra_ptr;
363   /* Output vector starting point. Must be divisible by 2. */
364   sljit_s32 ovector_start;
365   /* Points to the starting character of the current match. */
366   sljit_s32 start_ptr;
367   /* Last known position of the requested byte. */
368   sljit_s32 req_char_ptr;
369   /* Head of the last recursion. */
370   sljit_s32 recursive_head_ptr;
371   /* First inspected character for partial matching.
372      (Needed for avoiding zero length partial matches.) */
373   sljit_s32 start_used_ptr;
374   /* Starting pointer for partial soft matches. */
375   sljit_s32 hit_start;
376   /* Pointer of the match end position. */
377   sljit_s32 match_end_ptr;
378   /* Points to the marked string. */
379   sljit_s32 mark_ptr;
380   /* Recursive control verb management chain. */
381   sljit_s32 control_head_ptr;
382   /* Points to the last matched capture block index. */
383   sljit_s32 capture_last_ptr;
384   /* Fast forward skipping byte code pointer. */
385   PCRE2_SPTR fast_forward_bc_ptr;
386   /* Locals used by fast fail optimization. */
387   sljit_s32 fast_fail_start_ptr;
388   sljit_s32 fast_fail_end_ptr;
389 
390   /* Flipped and lower case tables. */
391   const sljit_u8 *fcc;
392   sljit_sw lcc;
393   /* Mode can be PCRE2_JIT_COMPLETE and others. */
394   int mode;
395   /* TRUE, when minlength is greater than 0. */
396   BOOL might_be_empty;
397   /* \K is found in the pattern. */
398   BOOL has_set_som;
399   /* (*SKIP:arg) is found in the pattern. */
400   BOOL has_skip_arg;
401   /* (*THEN) is found in the pattern. */
402   BOOL has_then;
403   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
404   BOOL has_skip_in_assert_back;
405   /* Currently in recurse or negative assert. */
406   BOOL local_exit;
407   /* Currently in a positive assert. */
408   BOOL positive_assert;
409   /* Newline control. */
410   int nltype;
411   sljit_u32 nlmax;
412   sljit_u32 nlmin;
413   int newline;
414   int bsr_nltype;
415   sljit_u32 bsr_nlmax;
416   sljit_u32 bsr_nlmin;
417   /* Dollar endonly. */
418   int endonly;
419   /* Tables. */
420   sljit_sw ctypes;
421   /* Named capturing brackets. */
422   PCRE2_SPTR name_table;
423   sljit_sw name_count;
424   sljit_sw name_entry_size;
425 
426   /* Labels and jump lists. */
427   struct sljit_label *partialmatchlabel;
428   struct sljit_label *quit_label;
429   struct sljit_label *forced_quit_label;
430   struct sljit_label *accept_label;
431   struct sljit_label *ff_newline_shortcut;
432   stub_list *stubs;
433   label_addr_list *label_addrs;
434   recurse_entry *entries;
435   recurse_entry *currententry;
436   jump_list *partialmatch;
437   jump_list *quit;
438   jump_list *positive_assert_quit;
439   jump_list *forced_quit;
440   jump_list *accept;
441   jump_list *calllimit;
442   jump_list *stackalloc;
443   jump_list *revertframes;
444   jump_list *wordboundary;
445   jump_list *anynewline;
446   jump_list *hspace;
447   jump_list *vspace;
448   jump_list *casefulcmp;
449   jump_list *caselesscmp;
450   jump_list *reset_match;
451   BOOL unset_backref;
452   BOOL alt_circumflex;
453 #ifdef SUPPORT_UNICODE
454   BOOL utf;
455   BOOL use_ucp;
456   jump_list *getucd;
457 #if PCRE2_CODE_UNIT_WIDTH == 8
458   jump_list *utfreadchar;
459   jump_list *utfreadchar16;
460   jump_list *utfreadtype8;
461 #endif
462 #endif /* SUPPORT_UNICODE */
463 } compiler_common;
464 
465 /* For byte_sequence_compare. */
466 
467 typedef struct compare_context {
468   int length;
469   int sourcereg;
470 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
471   int ucharptr;
472   union {
473     sljit_s32 asint;
474     sljit_u16 asushort;
475 #if PCRE2_CODE_UNIT_WIDTH == 8
476     sljit_u8 asbyte;
477     sljit_u8 asuchars[4];
478 #elif PCRE2_CODE_UNIT_WIDTH == 16
479     sljit_u16 asuchars[2];
480 #elif PCRE2_CODE_UNIT_WIDTH == 32
481     sljit_u32 asuchars[1];
482 #endif
483   } c;
484   union {
485     sljit_s32 asint;
486     sljit_u16 asushort;
487 #if PCRE2_CODE_UNIT_WIDTH == 8
488     sljit_u8 asbyte;
489     sljit_u8 asuchars[4];
490 #elif PCRE2_CODE_UNIT_WIDTH == 16
491     sljit_u16 asuchars[2];
492 #elif PCRE2_CODE_UNIT_WIDTH == 32
493     sljit_u32 asuchars[1];
494 #endif
495   } oc;
496 #endif
497 } compare_context;
498 
499 /* Undefine sljit macros. */
500 #undef CMP
501 
502 /* Used for accessing the elements of the stack. */
503 #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
504 
505 #define TMP1          SLJIT_R0
506 #define TMP2          SLJIT_R2
507 #define TMP3          SLJIT_R3
508 #define STR_PTR       SLJIT_S0
509 #define STR_END       SLJIT_S1
510 #define STACK_TOP     SLJIT_R1
511 #define STACK_LIMIT   SLJIT_S2
512 #define COUNT_MATCH   SLJIT_S3
513 #define ARGUMENTS     SLJIT_S4
514 #define RETURN_ADDR   SLJIT_R4
515 
516 /* Local space layout. */
517 /* These two locals can be used by the current opcode. */
518 #define LOCALS0          (0 * sizeof(sljit_sw))
519 #define LOCALS1          (1 * sizeof(sljit_sw))
520 /* Two local variables for possessive quantifiers (char1 cannot use them). */
521 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
522 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
523 /* Max limit of recursions. */
524 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
525 /* The output vector is stored on the stack, and contains pointers
526 to characters. The vector data is divided into two groups: the first
527 group contains the start / end character pointers, and the second is
528 the start pointers when the end of the capturing group has not yet reached. */
529 #define OVECTOR_START    (common->ovector_start)
530 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
531 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
533 
534 #if PCRE2_CODE_UNIT_WIDTH == 8
535 #define MOV_UCHAR  SLJIT_MOV_U8
536 #define MOVU_UCHAR SLJIT_MOVU_U8
537 #define IN_UCHARS(x) (x)
538 #elif PCRE2_CODE_UNIT_WIDTH == 16
539 #define MOV_UCHAR  SLJIT_MOV_U16
540 #define MOVU_UCHAR SLJIT_MOVU_U16
541 #define UCHAR_SHIFT (1)
542 #define IN_UCHARS(x) ((x) * 2)
543 #elif PCRE2_CODE_UNIT_WIDTH == 32
544 #define MOV_UCHAR  SLJIT_MOV_U32
545 #define MOVU_UCHAR SLJIT_MOVU_U32
546 #define UCHAR_SHIFT (2)
547 #define IN_UCHARS(x) ((x) * 4)
548 #else
549 #error Unsupported compiling mode
550 #endif
551 
552 /* Shortcuts. */
553 #define DEFINE_COMPILER \
554   struct sljit_compiler *compiler = common->compiler
555 #define OP1(op, dst, dstw, src, srcw) \
556   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
557 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
558   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
559 #define LABEL() \
560   sljit_emit_label(compiler)
561 #define JUMP(type) \
562   sljit_emit_jump(compiler, (type))
563 #define JUMPTO(type, label) \
564   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
565 #define JUMPHERE(jump) \
566   sljit_set_label((jump), sljit_emit_label(compiler))
567 #define SET_LABEL(jump, label) \
568   sljit_set_label((jump), (label))
569 #define CMP(type, src1, src1w, src2, src2w) \
570   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
571 #define CMPTO(type, src1, src1w, src2, src2w, label) \
572   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
573 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
574   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
575 #define GET_LOCAL_BASE(dst, dstw, offset) \
576   sljit_get_local_base(compiler, (dst), (dstw), (offset))
577 
578 #define READ_CHAR_MAX 0x7fffffff
579 
bracketend(PCRE2_SPTR cc)580 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
581 {
582 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
583 do cc += GET(cc, 1); while (*cc == OP_ALT);
584 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
585 cc += 1 + LINK_SIZE;
586 return cc;
587 }
588 
no_alternatives(PCRE2_SPTR cc)589 static int no_alternatives(PCRE2_SPTR cc)
590 {
591 int count = 0;
592 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
593 do
594   {
595   cc += GET(cc, 1);
596   count++;
597   }
598 while (*cc == OP_ALT);
599 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
600 return count;
601 }
602 
603 /* Functions whose might need modification for all new supported opcodes:
604  next_opcode
605  check_opcode_types
606  set_private_data_ptrs
607  get_framesize
608  init_frame
609  get_private_data_copy_length
610  copy_private_data
611  compile_matchingpath
612  compile_backtrackingpath
613 */
614 
next_opcode(compiler_common * common,PCRE2_SPTR cc)615 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
616 {
617 SLJIT_UNUSED_ARG(common);
618 switch(*cc)
619   {
620   case OP_SOD:
621   case OP_SOM:
622   case OP_SET_SOM:
623   case OP_NOT_WORD_BOUNDARY:
624   case OP_WORD_BOUNDARY:
625   case OP_NOT_DIGIT:
626   case OP_DIGIT:
627   case OP_NOT_WHITESPACE:
628   case OP_WHITESPACE:
629   case OP_NOT_WORDCHAR:
630   case OP_WORDCHAR:
631   case OP_ANY:
632   case OP_ALLANY:
633   case OP_NOTPROP:
634   case OP_PROP:
635   case OP_ANYNL:
636   case OP_NOT_HSPACE:
637   case OP_HSPACE:
638   case OP_NOT_VSPACE:
639   case OP_VSPACE:
640   case OP_EXTUNI:
641   case OP_EODN:
642   case OP_EOD:
643   case OP_CIRC:
644   case OP_CIRCM:
645   case OP_DOLL:
646   case OP_DOLLM:
647   case OP_CRSTAR:
648   case OP_CRMINSTAR:
649   case OP_CRPLUS:
650   case OP_CRMINPLUS:
651   case OP_CRQUERY:
652   case OP_CRMINQUERY:
653   case OP_CRRANGE:
654   case OP_CRMINRANGE:
655   case OP_CRPOSSTAR:
656   case OP_CRPOSPLUS:
657   case OP_CRPOSQUERY:
658   case OP_CRPOSRANGE:
659   case OP_CLASS:
660   case OP_NCLASS:
661   case OP_REF:
662   case OP_REFI:
663   case OP_DNREF:
664   case OP_DNREFI:
665   case OP_RECURSE:
666   case OP_CALLOUT:
667   case OP_ALT:
668   case OP_KET:
669   case OP_KETRMAX:
670   case OP_KETRMIN:
671   case OP_KETRPOS:
672   case OP_REVERSE:
673   case OP_ASSERT:
674   case OP_ASSERT_NOT:
675   case OP_ASSERTBACK:
676   case OP_ASSERTBACK_NOT:
677   case OP_ONCE:
678   case OP_ONCE_NC:
679   case OP_BRA:
680   case OP_BRAPOS:
681   case OP_CBRA:
682   case OP_CBRAPOS:
683   case OP_COND:
684   case OP_SBRA:
685   case OP_SBRAPOS:
686   case OP_SCBRA:
687   case OP_SCBRAPOS:
688   case OP_SCOND:
689   case OP_CREF:
690   case OP_DNCREF:
691   case OP_RREF:
692   case OP_DNRREF:
693   case OP_FALSE:
694   case OP_TRUE:
695   case OP_BRAZERO:
696   case OP_BRAMINZERO:
697   case OP_BRAPOSZERO:
698   case OP_PRUNE:
699   case OP_SKIP:
700   case OP_THEN:
701   case OP_COMMIT:
702   case OP_FAIL:
703   case OP_ACCEPT:
704   case OP_ASSERT_ACCEPT:
705   case OP_CLOSE:
706   case OP_SKIPZERO:
707   return cc + PRIV(OP_lengths)[*cc];
708 
709   case OP_CHAR:
710   case OP_CHARI:
711   case OP_NOT:
712   case OP_NOTI:
713   case OP_STAR:
714   case OP_MINSTAR:
715   case OP_PLUS:
716   case OP_MINPLUS:
717   case OP_QUERY:
718   case OP_MINQUERY:
719   case OP_UPTO:
720   case OP_MINUPTO:
721   case OP_EXACT:
722   case OP_POSSTAR:
723   case OP_POSPLUS:
724   case OP_POSQUERY:
725   case OP_POSUPTO:
726   case OP_STARI:
727   case OP_MINSTARI:
728   case OP_PLUSI:
729   case OP_MINPLUSI:
730   case OP_QUERYI:
731   case OP_MINQUERYI:
732   case OP_UPTOI:
733   case OP_MINUPTOI:
734   case OP_EXACTI:
735   case OP_POSSTARI:
736   case OP_POSPLUSI:
737   case OP_POSQUERYI:
738   case OP_POSUPTOI:
739   case OP_NOTSTAR:
740   case OP_NOTMINSTAR:
741   case OP_NOTPLUS:
742   case OP_NOTMINPLUS:
743   case OP_NOTQUERY:
744   case OP_NOTMINQUERY:
745   case OP_NOTUPTO:
746   case OP_NOTMINUPTO:
747   case OP_NOTEXACT:
748   case OP_NOTPOSSTAR:
749   case OP_NOTPOSPLUS:
750   case OP_NOTPOSQUERY:
751   case OP_NOTPOSUPTO:
752   case OP_NOTSTARI:
753   case OP_NOTMINSTARI:
754   case OP_NOTPLUSI:
755   case OP_NOTMINPLUSI:
756   case OP_NOTQUERYI:
757   case OP_NOTMINQUERYI:
758   case OP_NOTUPTOI:
759   case OP_NOTMINUPTOI:
760   case OP_NOTEXACTI:
761   case OP_NOTPOSSTARI:
762   case OP_NOTPOSPLUSI:
763   case OP_NOTPOSQUERYI:
764   case OP_NOTPOSUPTOI:
765   cc += PRIV(OP_lengths)[*cc];
766 #ifdef SUPPORT_UNICODE
767   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
768 #endif
769   return cc;
770 
771   /* Special cases. */
772   case OP_TYPESTAR:
773   case OP_TYPEMINSTAR:
774   case OP_TYPEPLUS:
775   case OP_TYPEMINPLUS:
776   case OP_TYPEQUERY:
777   case OP_TYPEMINQUERY:
778   case OP_TYPEUPTO:
779   case OP_TYPEMINUPTO:
780   case OP_TYPEEXACT:
781   case OP_TYPEPOSSTAR:
782   case OP_TYPEPOSPLUS:
783   case OP_TYPEPOSQUERY:
784   case OP_TYPEPOSUPTO:
785   return cc + PRIV(OP_lengths)[*cc] - 1;
786 
787   case OP_ANYBYTE:
788 #ifdef SUPPORT_UNICODE
789   if (common->utf) return NULL;
790 #endif
791   return cc + 1;
792 
793   case OP_CALLOUT_STR:
794   return cc + GET(cc, 1 + 2*LINK_SIZE);
795 
796 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
797   case OP_XCLASS:
798   return cc + GET(cc, 1);
799 #endif
800 
801   case OP_MARK:
802   case OP_PRUNE_ARG:
803   case OP_SKIP_ARG:
804   case OP_THEN_ARG:
805   return cc + 1 + 2 + cc[1];
806 
807   default:
808   /* All opcodes are supported now! */
809   SLJIT_ASSERT_STOP();
810   return NULL;
811   }
812 }
813 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)814 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
815 {
816 int count;
817 PCRE2_SPTR slot;
818 PCRE2_SPTR assert_back_end = cc - 1;
819 
820 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
821 while (cc < ccend)
822   {
823   switch(*cc)
824     {
825     case OP_SET_SOM:
826     common->has_set_som = TRUE;
827     common->might_be_empty = TRUE;
828     cc += 1;
829     break;
830 
831     case OP_REF:
832     case OP_REFI:
833     common->optimized_cbracket[GET2(cc, 1)] = 0;
834     cc += 1 + IMM2_SIZE;
835     break;
836 
837     case OP_CBRAPOS:
838     case OP_SCBRAPOS:
839     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
840     cc += 1 + LINK_SIZE + IMM2_SIZE;
841     break;
842 
843     case OP_COND:
844     case OP_SCOND:
845     /* Only AUTO_CALLOUT can insert this opcode. We do
846        not intend to support this case. */
847     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
848       return FALSE;
849     cc += 1 + LINK_SIZE;
850     break;
851 
852     case OP_CREF:
853     common->optimized_cbracket[GET2(cc, 1)] = 0;
854     cc += 1 + IMM2_SIZE;
855     break;
856 
857     case OP_DNREF:
858     case OP_DNREFI:
859     case OP_DNCREF:
860     count = GET2(cc, 1 + IMM2_SIZE);
861     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
862     while (count-- > 0)
863       {
864       common->optimized_cbracket[GET2(slot, 0)] = 0;
865       slot += common->name_entry_size;
866       }
867     cc += 1 + 2 * IMM2_SIZE;
868     break;
869 
870     case OP_RECURSE:
871     /* Set its value only once. */
872     if (common->recursive_head_ptr == 0)
873       {
874       common->recursive_head_ptr = common->ovector_start;
875       common->ovector_start += sizeof(sljit_sw);
876       }
877     cc += 1 + LINK_SIZE;
878     break;
879 
880     case OP_CALLOUT:
881     case OP_CALLOUT_STR:
882     if (common->capture_last_ptr == 0)
883       {
884       common->capture_last_ptr = common->ovector_start;
885       common->ovector_start += sizeof(sljit_sw);
886       }
887     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
888     break;
889 
890     case OP_ASSERTBACK:
891     slot = bracketend(cc);
892     if (slot > assert_back_end)
893       assert_back_end = slot;
894     cc += 1 + LINK_SIZE;
895     break;
896 
897     case OP_THEN_ARG:
898     common->has_then = TRUE;
899     common->control_head_ptr = 1;
900     /* Fall through. */
901 
902     case OP_PRUNE_ARG:
903     case OP_MARK:
904     if (common->mark_ptr == 0)
905       {
906       common->mark_ptr = common->ovector_start;
907       common->ovector_start += sizeof(sljit_sw);
908       }
909     cc += 1 + 2 + cc[1];
910     break;
911 
912     case OP_THEN:
913     common->has_then = TRUE;
914     common->control_head_ptr = 1;
915     cc += 1;
916     break;
917 
918     case OP_SKIP:
919     if (cc < assert_back_end)
920       common->has_skip_in_assert_back = TRUE;
921     cc += 1;
922     break;
923 
924     case OP_SKIP_ARG:
925     common->control_head_ptr = 1;
926     common->has_skip_arg = TRUE;
927     if (cc < assert_back_end)
928       common->has_skip_in_assert_back = TRUE;
929     cc += 1 + 2 + cc[1];
930     break;
931 
932     default:
933     cc = next_opcode(common, cc);
934     if (cc == NULL)
935       return FALSE;
936     break;
937     }
938   }
939 return TRUE;
940 }
941 
is_accelerated_repeat(PCRE2_SPTR cc)942 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
943 {
944 switch(*cc)
945   {
946   case OP_TYPESTAR:
947   case OP_TYPEMINSTAR:
948   case OP_TYPEPLUS:
949   case OP_TYPEMINPLUS:
950   case OP_TYPEPOSSTAR:
951   case OP_TYPEPOSPLUS:
952   return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
953 
954   case OP_STAR:
955   case OP_MINSTAR:
956   case OP_PLUS:
957   case OP_MINPLUS:
958   case OP_POSSTAR:
959   case OP_POSPLUS:
960 
961   case OP_STARI:
962   case OP_MINSTARI:
963   case OP_PLUSI:
964   case OP_MINPLUSI:
965   case OP_POSSTARI:
966   case OP_POSPLUSI:
967 
968   case OP_NOTSTAR:
969   case OP_NOTMINSTAR:
970   case OP_NOTPLUS:
971   case OP_NOTMINPLUS:
972   case OP_NOTPOSSTAR:
973   case OP_NOTPOSPLUS:
974 
975   case OP_NOTSTARI:
976   case OP_NOTMINSTARI:
977   case OP_NOTPLUSI:
978   case OP_NOTMINPLUSI:
979   case OP_NOTPOSSTARI:
980   case OP_NOTPOSPLUSI:
981   return TRUE;
982 
983   case OP_CLASS:
984   case OP_NCLASS:
985 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
986   case OP_XCLASS:
987   cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
988 #else
989   cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
990 #endif
991 
992   switch(*cc)
993     {
994     case OP_CRSTAR:
995     case OP_CRMINSTAR:
996     case OP_CRPLUS:
997     case OP_CRMINPLUS:
998     case OP_CRPOSSTAR:
999     case OP_CRPOSPLUS:
1000     return TRUE;
1001     }
1002   break;
1003   }
1004 return FALSE;
1005 }
1006 
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1007 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1008 {
1009 PCRE2_SPTR cc = common->start;
1010 PCRE2_SPTR end;
1011 
1012 /* Skip not repeated brackets. */
1013 while (TRUE)
1014   {
1015   switch(*cc)
1016     {
1017     case OP_SOD:
1018     case OP_SOM:
1019     case OP_SET_SOM:
1020     case OP_NOT_WORD_BOUNDARY:
1021     case OP_WORD_BOUNDARY:
1022     case OP_EODN:
1023     case OP_EOD:
1024     case OP_CIRC:
1025     case OP_CIRCM:
1026     case OP_DOLL:
1027     case OP_DOLLM:
1028     /* Zero width assertions. */
1029     cc++;
1030     continue;
1031     }
1032 
1033   if (*cc != OP_BRA && *cc != OP_CBRA)
1034     break;
1035 
1036   end = cc + GET(cc, 1);
1037   if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1038     return FALSE;
1039   if (*cc == OP_CBRA)
1040     {
1041     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1042       return FALSE;
1043     cc += IMM2_SIZE;
1044     }
1045   cc += 1 + LINK_SIZE;
1046   }
1047 
1048 if (is_accelerated_repeat(cc))
1049   {
1050   common->fast_forward_bc_ptr = cc;
1051   common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1052   *private_data_start += sizeof(sljit_sw);
1053   return TRUE;
1054   }
1055 return FALSE;
1056 }
1057 
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1058 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1059 {
1060   PCRE2_SPTR next_alt;
1061 
1062   SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1063 
1064   if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1065     return;
1066 
1067   next_alt = bracketend(cc) - (1 + LINK_SIZE);
1068   if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1069     return;
1070 
1071   do
1072     {
1073     next_alt = cc + GET(cc, 1);
1074 
1075     cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1076 
1077     while (TRUE)
1078       {
1079       switch(*cc)
1080         {
1081         case OP_SOD:
1082         case OP_SOM:
1083         case OP_SET_SOM:
1084         case OP_NOT_WORD_BOUNDARY:
1085         case OP_WORD_BOUNDARY:
1086         case OP_EODN:
1087         case OP_EOD:
1088         case OP_CIRC:
1089         case OP_CIRCM:
1090         case OP_DOLL:
1091         case OP_DOLLM:
1092         /* Zero width assertions. */
1093         cc++;
1094         continue;
1095         }
1096       break;
1097       }
1098 
1099     if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1100       detect_fast_fail(common, cc, private_data_start, depth - 1);
1101 
1102     if (is_accelerated_repeat(cc))
1103       {
1104       common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1105 
1106       if (common->fast_fail_start_ptr == 0)
1107         common->fast_fail_start_ptr = *private_data_start;
1108 
1109       *private_data_start += sizeof(sljit_sw);
1110       common->fast_fail_end_ptr = *private_data_start;
1111 
1112       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1113         return;
1114       }
1115 
1116     cc = next_alt;
1117     }
1118   while (*cc == OP_ALT);
1119 }
1120 
get_class_iterator_size(PCRE2_SPTR cc)1121 static int get_class_iterator_size(PCRE2_SPTR cc)
1122 {
1123 sljit_u32 min;
1124 sljit_u32 max;
1125 switch(*cc)
1126   {
1127   case OP_CRSTAR:
1128   case OP_CRPLUS:
1129   return 2;
1130 
1131   case OP_CRMINSTAR:
1132   case OP_CRMINPLUS:
1133   case OP_CRQUERY:
1134   case OP_CRMINQUERY:
1135   return 1;
1136 
1137   case OP_CRRANGE:
1138   case OP_CRMINRANGE:
1139   min = GET2(cc, 1);
1140   max = GET2(cc, 1 + IMM2_SIZE);
1141   if (max == 0)
1142     return (*cc == OP_CRRANGE) ? 2 : 1;
1143   max -= min;
1144   if (max > 2)
1145     max = 2;
1146   return max;
1147 
1148   default:
1149   return 0;
1150   }
1151 }
1152 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1153 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1154 {
1155 PCRE2_SPTR end = bracketend(begin);
1156 PCRE2_SPTR next;
1157 PCRE2_SPTR next_end;
1158 PCRE2_SPTR max_end;
1159 PCRE2_UCHAR type;
1160 sljit_sw length = end - begin;
1161 sljit_s32 min, max, i;
1162 
1163 /* Detect fixed iterations first. */
1164 if (end[-(1 + LINK_SIZE)] != OP_KET)
1165   return FALSE;
1166 
1167 /* Already detected repeat. */
1168 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1169   return TRUE;
1170 
1171 next = end;
1172 min = 1;
1173 while (1)
1174   {
1175   if (*next != *begin)
1176     break;
1177   next_end = bracketend(next);
1178   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1179     break;
1180   next = next_end;
1181   min++;
1182   }
1183 
1184 if (min == 2)
1185   return FALSE;
1186 
1187 max = 0;
1188 max_end = next;
1189 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1190   {
1191   type = *next;
1192   while (1)
1193     {
1194     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1195       break;
1196     next_end = bracketend(next + 2 + LINK_SIZE);
1197     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1198       break;
1199     next = next_end;
1200     max++;
1201     }
1202 
1203   if (next[0] == type && next[1] == *begin && max >= 1)
1204     {
1205     next_end = bracketend(next + 1);
1206     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1207       {
1208       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1209         if (*next_end != OP_KET)
1210           break;
1211 
1212       if (i == max)
1213         {
1214         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1215         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1216         /* +2 the original and the last. */
1217         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1218         if (min == 1)
1219           return TRUE;
1220         min--;
1221         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1222         }
1223       }
1224     }
1225   }
1226 
1227 if (min >= 3)
1228   {
1229   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1230   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1231   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1232   return TRUE;
1233   }
1234 
1235 return FALSE;
1236 }
1237 
1238 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1239     case OP_MINSTAR: \
1240     case OP_MINPLUS: \
1241     case OP_QUERY: \
1242     case OP_MINQUERY: \
1243     case OP_MINSTARI: \
1244     case OP_MINPLUSI: \
1245     case OP_QUERYI: \
1246     case OP_MINQUERYI: \
1247     case OP_NOTMINSTAR: \
1248     case OP_NOTMINPLUS: \
1249     case OP_NOTQUERY: \
1250     case OP_NOTMINQUERY: \
1251     case OP_NOTMINSTARI: \
1252     case OP_NOTMINPLUSI: \
1253     case OP_NOTQUERYI: \
1254     case OP_NOTMINQUERYI:
1255 
1256 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1257     case OP_STAR: \
1258     case OP_PLUS: \
1259     case OP_STARI: \
1260     case OP_PLUSI: \
1261     case OP_NOTSTAR: \
1262     case OP_NOTPLUS: \
1263     case OP_NOTSTARI: \
1264     case OP_NOTPLUSI:
1265 
1266 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1267     case OP_UPTO: \
1268     case OP_MINUPTO: \
1269     case OP_UPTOI: \
1270     case OP_MINUPTOI: \
1271     case OP_NOTUPTO: \
1272     case OP_NOTMINUPTO: \
1273     case OP_NOTUPTOI: \
1274     case OP_NOTMINUPTOI:
1275 
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1277     case OP_TYPEMINSTAR: \
1278     case OP_TYPEMINPLUS: \
1279     case OP_TYPEQUERY: \
1280     case OP_TYPEMINQUERY:
1281 
1282 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1283     case OP_TYPESTAR: \
1284     case OP_TYPEPLUS:
1285 
1286 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1287     case OP_TYPEUPTO: \
1288     case OP_TYPEMINUPTO:
1289 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1290 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1291 {
1292 PCRE2_SPTR cc = common->start;
1293 PCRE2_SPTR alternative;
1294 PCRE2_SPTR end = NULL;
1295 int private_data_ptr = *private_data_start;
1296 int space, size, bracketlen;
1297 BOOL repeat_check = TRUE;
1298 
1299 while (cc < ccend)
1300   {
1301   space = 0;
1302   size = 0;
1303   bracketlen = 0;
1304   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1305     break;
1306 
1307   if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1308     {
1309     if (detect_repeat(common, cc))
1310       {
1311       /* These brackets are converted to repeats, so no global
1312       based single character repeat is allowed. */
1313       if (cc >= end)
1314         end = bracketend(cc);
1315       }
1316     }
1317   repeat_check = TRUE;
1318 
1319   switch(*cc)
1320     {
1321     case OP_KET:
1322     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1323       {
1324       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1325       private_data_ptr += sizeof(sljit_sw);
1326       cc += common->private_data_ptrs[cc + 1 - common->start];
1327       }
1328     cc += 1 + LINK_SIZE;
1329     break;
1330 
1331     case OP_ASSERT:
1332     case OP_ASSERT_NOT:
1333     case OP_ASSERTBACK:
1334     case OP_ASSERTBACK_NOT:
1335     case OP_ONCE:
1336     case OP_ONCE_NC:
1337     case OP_BRAPOS:
1338     case OP_SBRA:
1339     case OP_SBRAPOS:
1340     case OP_SCOND:
1341     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1342     private_data_ptr += sizeof(sljit_sw);
1343     bracketlen = 1 + LINK_SIZE;
1344     break;
1345 
1346     case OP_CBRAPOS:
1347     case OP_SCBRAPOS:
1348     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349     private_data_ptr += sizeof(sljit_sw);
1350     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1351     break;
1352 
1353     case OP_COND:
1354     /* Might be a hidden SCOND. */
1355     alternative = cc + GET(cc, 1);
1356     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1357       {
1358       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1359       private_data_ptr += sizeof(sljit_sw);
1360       }
1361     bracketlen = 1 + LINK_SIZE;
1362     break;
1363 
1364     case OP_BRA:
1365     bracketlen = 1 + LINK_SIZE;
1366     break;
1367 
1368     case OP_CBRA:
1369     case OP_SCBRA:
1370     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1371     break;
1372 
1373     case OP_BRAZERO:
1374     case OP_BRAMINZERO:
1375     case OP_BRAPOSZERO:
1376     repeat_check = FALSE;
1377     size = 1;
1378     break;
1379 
1380     CASE_ITERATOR_PRIVATE_DATA_1
1381     space = 1;
1382     size = -2;
1383     break;
1384 
1385     CASE_ITERATOR_PRIVATE_DATA_2A
1386     space = 2;
1387     size = -2;
1388     break;
1389 
1390     CASE_ITERATOR_PRIVATE_DATA_2B
1391     space = 2;
1392     size = -(2 + IMM2_SIZE);
1393     break;
1394 
1395     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1396     space = 1;
1397     size = 1;
1398     break;
1399 
1400     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1401     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1402       space = 2;
1403     size = 1;
1404     break;
1405 
1406     case OP_TYPEUPTO:
1407     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1408       space = 2;
1409     size = 1 + IMM2_SIZE;
1410     break;
1411 
1412     case OP_TYPEMINUPTO:
1413     space = 2;
1414     size = 1 + IMM2_SIZE;
1415     break;
1416 
1417     case OP_CLASS:
1418     case OP_NCLASS:
1419     space = get_class_iterator_size(cc + size);
1420     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1421     break;
1422 
1423 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1424     case OP_XCLASS:
1425     space = get_class_iterator_size(cc + size);
1426     size = GET(cc, 1);
1427     break;
1428 #endif
1429 
1430     default:
1431     cc = next_opcode(common, cc);
1432     SLJIT_ASSERT(cc != NULL);
1433     break;
1434     }
1435 
1436   /* Character iterators, which are not inside a repeated bracket,
1437      gets a private slot instead of allocating it on the stack. */
1438   if (space > 0 && cc >= end)
1439     {
1440     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1441     private_data_ptr += sizeof(sljit_sw) * space;
1442     }
1443 
1444   if (size != 0)
1445     {
1446     if (size < 0)
1447       {
1448       cc += -size;
1449 #ifdef SUPPORT_UNICODE
1450       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1451 #endif
1452       }
1453     else
1454       cc += size;
1455     }
1456 
1457   if (bracketlen > 0)
1458     {
1459     if (cc >= end)
1460       {
1461       end = bracketend(cc);
1462       if (end[-1 - LINK_SIZE] == OP_KET)
1463         end = NULL;
1464       }
1465     cc += bracketlen;
1466     }
1467   }
1468 *private_data_start = private_data_ptr;
1469 }
1470 
1471 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1472 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1473 {
1474 int length = 0;
1475 int possessive = 0;
1476 BOOL stack_restore = FALSE;
1477 BOOL setsom_found = recursive;
1478 BOOL setmark_found = recursive;
1479 /* The last capture is a local variable even for recursions. */
1480 BOOL capture_last_found = FALSE;
1481 
1482 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1483 SLJIT_ASSERT(common->control_head_ptr != 0);
1484 *needs_control_head = TRUE;
1485 #else
1486 *needs_control_head = FALSE;
1487 #endif
1488 
1489 if (ccend == NULL)
1490   {
1491   ccend = bracketend(cc) - (1 + LINK_SIZE);
1492   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1493     {
1494     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1495     /* This is correct regardless of common->capture_last_ptr. */
1496     capture_last_found = TRUE;
1497     }
1498   cc = next_opcode(common, cc);
1499   }
1500 
1501 SLJIT_ASSERT(cc != NULL);
1502 while (cc < ccend)
1503   switch(*cc)
1504     {
1505     case OP_SET_SOM:
1506     SLJIT_ASSERT(common->has_set_som);
1507     stack_restore = TRUE;
1508     if (!setsom_found)
1509       {
1510       length += 2;
1511       setsom_found = TRUE;
1512       }
1513     cc += 1;
1514     break;
1515 
1516     case OP_MARK:
1517     case OP_PRUNE_ARG:
1518     case OP_THEN_ARG:
1519     SLJIT_ASSERT(common->mark_ptr != 0);
1520     stack_restore = TRUE;
1521     if (!setmark_found)
1522       {
1523       length += 2;
1524       setmark_found = TRUE;
1525       }
1526     if (common->control_head_ptr != 0)
1527       *needs_control_head = TRUE;
1528     cc += 1 + 2 + cc[1];
1529     break;
1530 
1531     case OP_RECURSE:
1532     stack_restore = TRUE;
1533     if (common->has_set_som && !setsom_found)
1534       {
1535       length += 2;
1536       setsom_found = TRUE;
1537       }
1538     if (common->mark_ptr != 0 && !setmark_found)
1539       {
1540       length += 2;
1541       setmark_found = TRUE;
1542       }
1543     if (common->capture_last_ptr != 0 && !capture_last_found)
1544       {
1545       length += 2;
1546       capture_last_found = TRUE;
1547       }
1548     cc += 1 + LINK_SIZE;
1549     break;
1550 
1551     case OP_CBRA:
1552     case OP_CBRAPOS:
1553     case OP_SCBRA:
1554     case OP_SCBRAPOS:
1555     stack_restore = TRUE;
1556     if (common->capture_last_ptr != 0 && !capture_last_found)
1557       {
1558       length += 2;
1559       capture_last_found = TRUE;
1560       }
1561     length += 3;
1562     cc += 1 + LINK_SIZE + IMM2_SIZE;
1563     break;
1564 
1565     case OP_THEN:
1566     stack_restore = TRUE;
1567     if (common->control_head_ptr != 0)
1568       *needs_control_head = TRUE;
1569     cc ++;
1570     break;
1571 
1572     default:
1573     stack_restore = TRUE;
1574     /* Fall through. */
1575 
1576     case OP_NOT_WORD_BOUNDARY:
1577     case OP_WORD_BOUNDARY:
1578     case OP_NOT_DIGIT:
1579     case OP_DIGIT:
1580     case OP_NOT_WHITESPACE:
1581     case OP_WHITESPACE:
1582     case OP_NOT_WORDCHAR:
1583     case OP_WORDCHAR:
1584     case OP_ANY:
1585     case OP_ALLANY:
1586     case OP_ANYBYTE:
1587     case OP_NOTPROP:
1588     case OP_PROP:
1589     case OP_ANYNL:
1590     case OP_NOT_HSPACE:
1591     case OP_HSPACE:
1592     case OP_NOT_VSPACE:
1593     case OP_VSPACE:
1594     case OP_EXTUNI:
1595     case OP_EODN:
1596     case OP_EOD:
1597     case OP_CIRC:
1598     case OP_CIRCM:
1599     case OP_DOLL:
1600     case OP_DOLLM:
1601     case OP_CHAR:
1602     case OP_CHARI:
1603     case OP_NOT:
1604     case OP_NOTI:
1605 
1606     case OP_EXACT:
1607     case OP_POSSTAR:
1608     case OP_POSPLUS:
1609     case OP_POSQUERY:
1610     case OP_POSUPTO:
1611 
1612     case OP_EXACTI:
1613     case OP_POSSTARI:
1614     case OP_POSPLUSI:
1615     case OP_POSQUERYI:
1616     case OP_POSUPTOI:
1617 
1618     case OP_NOTEXACT:
1619     case OP_NOTPOSSTAR:
1620     case OP_NOTPOSPLUS:
1621     case OP_NOTPOSQUERY:
1622     case OP_NOTPOSUPTO:
1623 
1624     case OP_NOTEXACTI:
1625     case OP_NOTPOSSTARI:
1626     case OP_NOTPOSPLUSI:
1627     case OP_NOTPOSQUERYI:
1628     case OP_NOTPOSUPTOI:
1629 
1630     case OP_TYPEEXACT:
1631     case OP_TYPEPOSSTAR:
1632     case OP_TYPEPOSPLUS:
1633     case OP_TYPEPOSQUERY:
1634     case OP_TYPEPOSUPTO:
1635 
1636     case OP_CLASS:
1637     case OP_NCLASS:
1638     case OP_XCLASS:
1639 
1640     case OP_CALLOUT:
1641     case OP_CALLOUT_STR:
1642 
1643     cc = next_opcode(common, cc);
1644     SLJIT_ASSERT(cc != NULL);
1645     break;
1646     }
1647 
1648 /* Possessive quantifiers can use a special case. */
1649 if (SLJIT_UNLIKELY(possessive == length))
1650   return stack_restore ? no_frame : no_stack;
1651 
1652 if (length > 0)
1653   return length + 1;
1654 return stack_restore ? no_frame : no_stack;
1655 }
1656 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop,BOOL recursive)1657 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop, BOOL recursive)
1658 {
1659 DEFINE_COMPILER;
1660 BOOL setsom_found = recursive;
1661 BOOL setmark_found = recursive;
1662 /* The last capture is a local variable even for recursions. */
1663 BOOL capture_last_found = FALSE;
1664 int offset;
1665 
1666 /* >= 1 + shortest item size (2) */
1667 SLJIT_UNUSED_ARG(stacktop);
1668 SLJIT_ASSERT(stackpos >= stacktop + 2);
1669 
1670 stackpos = STACK(stackpos);
1671 if (ccend == NULL)
1672   {
1673   ccend = bracketend(cc) - (1 + LINK_SIZE);
1674   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1675     cc = next_opcode(common, cc);
1676   }
1677 
1678 SLJIT_ASSERT(cc != NULL);
1679 while (cc < ccend)
1680   switch(*cc)
1681     {
1682     case OP_SET_SOM:
1683     SLJIT_ASSERT(common->has_set_som);
1684     if (!setsom_found)
1685       {
1686       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1687       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1688       stackpos += (int)sizeof(sljit_sw);
1689       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1690       stackpos += (int)sizeof(sljit_sw);
1691       setsom_found = TRUE;
1692       }
1693     cc += 1;
1694     break;
1695 
1696     case OP_MARK:
1697     case OP_PRUNE_ARG:
1698     case OP_THEN_ARG:
1699     SLJIT_ASSERT(common->mark_ptr != 0);
1700     if (!setmark_found)
1701       {
1702       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1703       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1704       stackpos += (int)sizeof(sljit_sw);
1705       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1706       stackpos += (int)sizeof(sljit_sw);
1707       setmark_found = TRUE;
1708       }
1709     cc += 1 + 2 + cc[1];
1710     break;
1711 
1712     case OP_RECURSE:
1713     if (common->has_set_som && !setsom_found)
1714       {
1715       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1716       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1717       stackpos += (int)sizeof(sljit_sw);
1718       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1719       stackpos += (int)sizeof(sljit_sw);
1720       setsom_found = TRUE;
1721       }
1722     if (common->mark_ptr != 0 && !setmark_found)
1723       {
1724       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1725       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1726       stackpos += (int)sizeof(sljit_sw);
1727       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1728       stackpos += (int)sizeof(sljit_sw);
1729       setmark_found = TRUE;
1730       }
1731     if (common->capture_last_ptr != 0 && !capture_last_found)
1732       {
1733       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1734       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1735       stackpos += (int)sizeof(sljit_sw);
1736       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1737       stackpos += (int)sizeof(sljit_sw);
1738       capture_last_found = TRUE;
1739       }
1740     cc += 1 + LINK_SIZE;
1741     break;
1742 
1743     case OP_CBRA:
1744     case OP_CBRAPOS:
1745     case OP_SCBRA:
1746     case OP_SCBRAPOS:
1747     if (common->capture_last_ptr != 0 && !capture_last_found)
1748       {
1749       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1750       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1751       stackpos += (int)sizeof(sljit_sw);
1752       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1753       stackpos += (int)sizeof(sljit_sw);
1754       capture_last_found = TRUE;
1755       }
1756     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1757     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1758     stackpos += (int)sizeof(sljit_sw);
1759     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1760     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1761     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1762     stackpos += (int)sizeof(sljit_sw);
1763     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1764     stackpos += (int)sizeof(sljit_sw);
1765 
1766     cc += 1 + LINK_SIZE + IMM2_SIZE;
1767     break;
1768 
1769     default:
1770     cc = next_opcode(common, cc);
1771     SLJIT_ASSERT(cc != NULL);
1772     break;
1773     }
1774 
1775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1776 SLJIT_ASSERT(stackpos == STACK(stacktop));
1777 }
1778 
get_private_data_copy_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL needs_control_head)1779 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL needs_control_head)
1780 {
1781 int private_data_length = needs_control_head ? 3 : 2;
1782 int size;
1783 PCRE2_SPTR alternative;
1784 /* Calculate the sum of the private machine words. */
1785 while (cc < ccend)
1786   {
1787   size = 0;
1788   switch(*cc)
1789     {
1790     case OP_KET:
1791     if (PRIVATE_DATA(cc) != 0)
1792       {
1793       private_data_length++;
1794       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1795       cc += PRIVATE_DATA(cc + 1);
1796       }
1797     cc += 1 + LINK_SIZE;
1798     break;
1799 
1800     case OP_ASSERT:
1801     case OP_ASSERT_NOT:
1802     case OP_ASSERTBACK:
1803     case OP_ASSERTBACK_NOT:
1804     case OP_ONCE:
1805     case OP_ONCE_NC:
1806     case OP_BRAPOS:
1807     case OP_SBRA:
1808     case OP_SBRAPOS:
1809     case OP_SCOND:
1810     private_data_length++;
1811     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1812     cc += 1 + LINK_SIZE;
1813     break;
1814 
1815     case OP_CBRA:
1816     case OP_SCBRA:
1817     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1818       private_data_length++;
1819     cc += 1 + LINK_SIZE + IMM2_SIZE;
1820     break;
1821 
1822     case OP_CBRAPOS:
1823     case OP_SCBRAPOS:
1824     private_data_length += 2;
1825     cc += 1 + LINK_SIZE + IMM2_SIZE;
1826     break;
1827 
1828     case OP_COND:
1829     /* Might be a hidden SCOND. */
1830     alternative = cc + GET(cc, 1);
1831     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1832       private_data_length++;
1833     cc += 1 + LINK_SIZE;
1834     break;
1835 
1836     CASE_ITERATOR_PRIVATE_DATA_1
1837     if (PRIVATE_DATA(cc))
1838       private_data_length++;
1839     cc += 2;
1840 #ifdef SUPPORT_UNICODE
1841     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1842 #endif
1843     break;
1844 
1845     CASE_ITERATOR_PRIVATE_DATA_2A
1846     if (PRIVATE_DATA(cc))
1847       private_data_length += 2;
1848     cc += 2;
1849 #ifdef SUPPORT_UNICODE
1850     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1851 #endif
1852     break;
1853 
1854     CASE_ITERATOR_PRIVATE_DATA_2B
1855     if (PRIVATE_DATA(cc))
1856       private_data_length += 2;
1857     cc += 2 + IMM2_SIZE;
1858 #ifdef SUPPORT_UNICODE
1859     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1860 #endif
1861     break;
1862 
1863     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1864     if (PRIVATE_DATA(cc))
1865       private_data_length++;
1866     cc += 1;
1867     break;
1868 
1869     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1870     if (PRIVATE_DATA(cc))
1871       private_data_length += 2;
1872     cc += 1;
1873     break;
1874 
1875     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1876     if (PRIVATE_DATA(cc))
1877       private_data_length += 2;
1878     cc += 1 + IMM2_SIZE;
1879     break;
1880 
1881     case OP_CLASS:
1882     case OP_NCLASS:
1883 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1884     case OP_XCLASS:
1885     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
1886 #else
1887     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
1888 #endif
1889     if (PRIVATE_DATA(cc))
1890       private_data_length += get_class_iterator_size(cc + size);
1891     cc += size;
1892     break;
1893 
1894     default:
1895     cc = next_opcode(common, cc);
1896     SLJIT_ASSERT(cc != NULL);
1897     break;
1898     }
1899   }
1900 SLJIT_ASSERT(cc == ccend);
1901 return private_data_length;
1902 }
1903 
copy_private_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1904 static void copy_private_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
1905   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1906 {
1907 DEFINE_COMPILER;
1908 int srcw[2];
1909 int count, size;
1910 BOOL tmp1next = TRUE;
1911 BOOL tmp1empty = TRUE;
1912 BOOL tmp2empty = TRUE;
1913 PCRE2_SPTR alternative;
1914 enum {
1915   start,
1916   loop,
1917   end
1918 } status;
1919 
1920 status = save ? start : loop;
1921 stackptr = STACK(stackptr - 2);
1922 stacktop = STACK(stacktop - 1);
1923 
1924 if (!save)
1925   {
1926   stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1927   if (stackptr < stacktop)
1928     {
1929     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1930     stackptr += sizeof(sljit_sw);
1931     tmp1empty = FALSE;
1932     }
1933   if (stackptr < stacktop)
1934     {
1935     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936     stackptr += sizeof(sljit_sw);
1937     tmp2empty = FALSE;
1938     }
1939   /* The tmp1next must be TRUE in either way. */
1940   }
1941 
1942 do
1943   {
1944   count = 0;
1945   switch(status)
1946     {
1947     case start:
1948     SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1949     count = 1;
1950     srcw[0] = common->recursive_head_ptr;
1951     if (needs_control_head)
1952       {
1953       SLJIT_ASSERT(common->control_head_ptr != 0);
1954       count = 2;
1955       srcw[1] = common->control_head_ptr;
1956       }
1957     status = loop;
1958     break;
1959 
1960     case loop:
1961     if (cc >= ccend)
1962       {
1963       status = end;
1964       break;
1965       }
1966 
1967     switch(*cc)
1968       {
1969       case OP_KET:
1970       if (PRIVATE_DATA(cc) != 0)
1971         {
1972         count = 1;
1973         srcw[0] = PRIVATE_DATA(cc);
1974         SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1975         cc += PRIVATE_DATA(cc + 1);
1976         }
1977       cc += 1 + LINK_SIZE;
1978       break;
1979 
1980       case OP_ASSERT:
1981       case OP_ASSERT_NOT:
1982       case OP_ASSERTBACK:
1983       case OP_ASSERTBACK_NOT:
1984       case OP_ONCE:
1985       case OP_ONCE_NC:
1986       case OP_BRAPOS:
1987       case OP_SBRA:
1988       case OP_SBRAPOS:
1989       case OP_SCOND:
1990       count = 1;
1991       srcw[0] = PRIVATE_DATA(cc);
1992       SLJIT_ASSERT(srcw[0] != 0);
1993       cc += 1 + LINK_SIZE;
1994       break;
1995 
1996       case OP_CBRA:
1997       case OP_SCBRA:
1998       if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1999         {
2000         count = 1;
2001         srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2002         }
2003       cc += 1 + LINK_SIZE + IMM2_SIZE;
2004       break;
2005 
2006       case OP_CBRAPOS:
2007       case OP_SCBRAPOS:
2008       count = 2;
2009       srcw[0] = PRIVATE_DATA(cc);
2010       srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2011       SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
2012       cc += 1 + LINK_SIZE + IMM2_SIZE;
2013       break;
2014 
2015       case OP_COND:
2016       /* Might be a hidden SCOND. */
2017       alternative = cc + GET(cc, 1);
2018       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2019         {
2020         count = 1;
2021         srcw[0] = PRIVATE_DATA(cc);
2022         SLJIT_ASSERT(srcw[0] != 0);
2023         }
2024       cc += 1 + LINK_SIZE;
2025       break;
2026 
2027       CASE_ITERATOR_PRIVATE_DATA_1
2028       if (PRIVATE_DATA(cc))
2029         {
2030         count = 1;
2031         srcw[0] = PRIVATE_DATA(cc);
2032         }
2033       cc += 2;
2034 #ifdef SUPPORT_UNICODE
2035       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2036 #endif
2037       break;
2038 
2039       CASE_ITERATOR_PRIVATE_DATA_2A
2040       if (PRIVATE_DATA(cc))
2041         {
2042         count = 2;
2043         srcw[0] = PRIVATE_DATA(cc);
2044         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2045         }
2046       cc += 2;
2047 #ifdef SUPPORT_UNICODE
2048       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2049 #endif
2050       break;
2051 
2052       CASE_ITERATOR_PRIVATE_DATA_2B
2053       if (PRIVATE_DATA(cc))
2054         {
2055         count = 2;
2056         srcw[0] = PRIVATE_DATA(cc);
2057         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2058         }
2059       cc += 2 + IMM2_SIZE;
2060 #ifdef SUPPORT_UNICODE
2061       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2062 #endif
2063       break;
2064 
2065       CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2066       if (PRIVATE_DATA(cc))
2067         {
2068         count = 1;
2069         srcw[0] = PRIVATE_DATA(cc);
2070         }
2071       cc += 1;
2072       break;
2073 
2074       CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2075       if (PRIVATE_DATA(cc))
2076         {
2077         count = 2;
2078         srcw[0] = PRIVATE_DATA(cc);
2079         srcw[1] = srcw[0] + sizeof(sljit_sw);
2080         }
2081       cc += 1;
2082       break;
2083 
2084       CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2085       if (PRIVATE_DATA(cc))
2086         {
2087         count = 2;
2088         srcw[0] = PRIVATE_DATA(cc);
2089         srcw[1] = srcw[0] + sizeof(sljit_sw);
2090         }
2091       cc += 1 + IMM2_SIZE;
2092       break;
2093 
2094       case OP_CLASS:
2095       case OP_NCLASS:
2096 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2097       case OP_XCLASS:
2098       size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2099 #else
2100       size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2101 #endif
2102       if (PRIVATE_DATA(cc))
2103         switch(get_class_iterator_size(cc + size))
2104           {
2105           case 1:
2106           count = 1;
2107           srcw[0] = PRIVATE_DATA(cc);
2108           break;
2109 
2110           case 2:
2111           count = 2;
2112           srcw[0] = PRIVATE_DATA(cc);
2113           srcw[1] = srcw[0] + sizeof(sljit_sw);
2114           break;
2115 
2116           default:
2117           SLJIT_ASSERT_STOP();
2118           break;
2119           }
2120       cc += size;
2121       break;
2122 
2123       default:
2124       cc = next_opcode(common, cc);
2125       SLJIT_ASSERT(cc != NULL);
2126       break;
2127       }
2128     break;
2129 
2130     case end:
2131     SLJIT_ASSERT_STOP();
2132     break;
2133     }
2134 
2135   while (count > 0)
2136     {
2137     count--;
2138     if (save)
2139       {
2140       if (tmp1next)
2141         {
2142         if (!tmp1empty)
2143           {
2144           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2145           stackptr += sizeof(sljit_sw);
2146           }
2147         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2148         tmp1empty = FALSE;
2149         tmp1next = FALSE;
2150         }
2151       else
2152         {
2153         if (!tmp2empty)
2154           {
2155           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2156           stackptr += sizeof(sljit_sw);
2157           }
2158         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2159         tmp2empty = FALSE;
2160         tmp1next = TRUE;
2161         }
2162       }
2163     else
2164       {
2165       if (tmp1next)
2166         {
2167         SLJIT_ASSERT(!tmp1empty);
2168         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2169         tmp1empty = stackptr >= stacktop;
2170         if (!tmp1empty)
2171           {
2172           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2173           stackptr += sizeof(sljit_sw);
2174           }
2175         tmp1next = FALSE;
2176         }
2177       else
2178         {
2179         SLJIT_ASSERT(!tmp2empty);
2180         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2181         tmp2empty = stackptr >= stacktop;
2182         if (!tmp2empty)
2183           {
2184           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2185           stackptr += sizeof(sljit_sw);
2186           }
2187         tmp1next = TRUE;
2188         }
2189       }
2190     }
2191   }
2192 while (status != end);
2193 
2194 if (save)
2195   {
2196   if (tmp1next)
2197     {
2198     if (!tmp1empty)
2199       {
2200       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2201       stackptr += sizeof(sljit_sw);
2202       }
2203     if (!tmp2empty)
2204       {
2205       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2206       stackptr += sizeof(sljit_sw);
2207       }
2208     }
2209   else
2210     {
2211     if (!tmp2empty)
2212       {
2213       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2214       stackptr += sizeof(sljit_sw);
2215       }
2216     if (!tmp1empty)
2217       {
2218       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2219       stackptr += sizeof(sljit_sw);
2220       }
2221     }
2222   }
2223 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2224 }
2225 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2226 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2227 {
2228 PCRE2_SPTR end = bracketend(cc);
2229 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2230 
2231 /* Assert captures then. */
2232 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2233   current_offset = NULL;
2234 /* Conditional block does not. */
2235 if (*cc == OP_COND || *cc == OP_SCOND)
2236   has_alternatives = FALSE;
2237 
2238 cc = next_opcode(common, cc);
2239 if (has_alternatives)
2240   current_offset = common->then_offsets + (cc - common->start);
2241 
2242 while (cc < end)
2243   {
2244   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2245     cc = set_then_offsets(common, cc, current_offset);
2246   else
2247     {
2248     if (*cc == OP_ALT && has_alternatives)
2249       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2250     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2251       *current_offset = 1;
2252     cc = next_opcode(common, cc);
2253     }
2254   }
2255 
2256 return end;
2257 }
2258 
2259 #undef CASE_ITERATOR_PRIVATE_DATA_1
2260 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2261 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2262 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2263 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2264 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2265 
is_powerof2(unsigned int value)2266 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2267 {
2268 return (value & (value - 1)) == 0;
2269 }
2270 
set_jumps(jump_list * list,struct sljit_label * label)2271 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2272 {
2273 while (list)
2274   {
2275   /* sljit_set_label is clever enough to do nothing
2276   if either the jump or the label is NULL. */
2277   SET_LABEL(list->jump, label);
2278   list = list->next;
2279   }
2280 }
2281 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2282 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2283 {
2284 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2285 if (list_item)
2286   {
2287   list_item->next = *list;
2288   list_item->jump = jump;
2289   *list = list_item;
2290   }
2291 }
2292 
add_stub(compiler_common * common,struct sljit_jump * start)2293 static void add_stub(compiler_common *common, struct sljit_jump *start)
2294 {
2295 DEFINE_COMPILER;
2296 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2297 
2298 if (list_item)
2299   {
2300   list_item->start = start;
2301   list_item->quit = LABEL();
2302   list_item->next = common->stubs;
2303   common->stubs = list_item;
2304   }
2305 }
2306 
flush_stubs(compiler_common * common)2307 static void flush_stubs(compiler_common *common)
2308 {
2309 DEFINE_COMPILER;
2310 stub_list *list_item = common->stubs;
2311 
2312 while (list_item)
2313   {
2314   JUMPHERE(list_item->start);
2315   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2316   JUMPTO(SLJIT_JUMP, list_item->quit);
2317   list_item = list_item->next;
2318   }
2319 common->stubs = NULL;
2320 }
2321 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2322 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2323 {
2324 DEFINE_COMPILER;
2325 label_addr_list *label_addr;
2326 
2327 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2328 if (label_addr == NULL)
2329   return;
2330 label_addr->label = LABEL();
2331 label_addr->update_addr = update_addr;
2332 label_addr->next = common->label_addrs;
2333 common->label_addrs = label_addr;
2334 }
2335 
count_match(compiler_common * common)2336 static SLJIT_INLINE void count_match(compiler_common *common)
2337 {
2338 DEFINE_COMPILER;
2339 
2340 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2341 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2342 }
2343 
allocate_stack(compiler_common * common,int size)2344 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2345 {
2346 /* May destroy all locals and registers except TMP2. */
2347 DEFINE_COMPILER;
2348 
2349 SLJIT_ASSERT(size > 0);
2350 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2351 #ifdef DESTROY_REGISTERS
2352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2353 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2354 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2357 #endif
2358 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2359 }
2360 
free_stack(compiler_common * common,int size)2361 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2362 {
2363 DEFINE_COMPILER;
2364 
2365 SLJIT_ASSERT(size > 0);
2366 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2367 }
2368 
allocate_read_only_data(compiler_common * common,sljit_uw size)2369 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2370 {
2371 DEFINE_COMPILER;
2372 sljit_uw *result;
2373 
2374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2375   return NULL;
2376 
2377 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2378 if (SLJIT_UNLIKELY(result == NULL))
2379   {
2380   sljit_set_compiler_memory_error(compiler);
2381   return NULL;
2382   }
2383 
2384 *(void**)result = common->read_only_data_head;
2385 common->read_only_data_head = (void *)result;
2386 return result + 1;
2387 }
2388 
reset_ovector(compiler_common * common,int length)2389 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2390 {
2391 DEFINE_COMPILER;
2392 struct sljit_label *loop;
2393 sljit_s32 i;
2394 
2395 /* At this point we can freely use all temporary registers. */
2396 SLJIT_ASSERT(length > 1);
2397 /* TMP1 returns with begin - 1. */
2398 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2399 if (length < 8)
2400   {
2401   for (i = 1; i < length; i++)
2402     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2403   }
2404 else
2405   {
2406   GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2407   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2408   loop = LABEL();
2409   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2410   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2411   JUMPTO(SLJIT_NOT_ZERO, loop);
2412   }
2413 }
2414 
reset_fast_fail(compiler_common * common)2415 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2416 {
2417 DEFINE_COMPILER;
2418 sljit_s32 i;
2419 
2420 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2421 
2422 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2423 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2424   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2425 }
2426 
do_reset_match(compiler_common * common,int length)2427 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2428 {
2429 DEFINE_COMPILER;
2430 struct sljit_label *loop;
2431 int i;
2432 
2433 SLJIT_ASSERT(length > 1);
2434 /* OVECTOR(1) contains the "string begin - 1" constant. */
2435 if (length > 2)
2436   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2437 if (length < 8)
2438   {
2439   for (i = 2; i < length; i++)
2440     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2441   }
2442 else
2443   {
2444   GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2445   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2446   loop = LABEL();
2447   OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2448   OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2449   JUMPTO(SLJIT_NOT_ZERO, loop);
2450   }
2451 
2452 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2453 if (common->mark_ptr != 0)
2454   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2455 if (common->control_head_ptr != 0)
2456   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2457 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2459 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2460 }
2461 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2462 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2463 {
2464 while (current != NULL)
2465   {
2466   switch (current[-2])
2467     {
2468     case type_then_trap:
2469     break;
2470 
2471     case type_mark:
2472     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[-3]) == 0)
2473       return current[-4];
2474     break;
2475 
2476     default:
2477     SLJIT_ASSERT_STOP();
2478     break;
2479     }
2480   SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2481   current = (sljit_sw*)current[-1];
2482   }
2483 return -1;
2484 }
2485 
copy_ovector(compiler_common * common,int topbracket)2486 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2487 {
2488 DEFINE_COMPILER;
2489 struct sljit_label *loop;
2490 
2491 /* At this point we can freely use all registers. */
2492 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2494 
2495 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2496 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2497 if (common->mark_ptr != 0)
2498   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2499 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
2500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
2501 if (common->mark_ptr != 0)
2502   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2503 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
2504   SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
2505 
2506 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2507 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2508 
2509 loop = LABEL();
2510 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2511 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2512 /* Copy the integer value to the output buffer */
2513 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2514 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2515 #endif
2516 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
2517 if (sizeof(PCRE2_SIZE) == 4)
2518   OP1(SLJIT_MOVU_U32, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
2519 else
2520   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
2521 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2522 JUMPTO(SLJIT_NOT_ZERO, loop);
2523 
2524 /* Calculate the return value, which is the maximum ovector value. */
2525 if (topbracket > 1)
2526   {
2527   GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2528   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2529 
2530   /* OVECTOR(0) is never equal to SLJIT_S2. */
2531   loop = LABEL();
2532   OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2533   OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2534   CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2535   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2536   }
2537 else
2538   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2539 }
2540 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2541 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2542 {
2543 DEFINE_COMPILER;
2544 sljit_s32 mov_opcode;
2545 
2546 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2547 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2548   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
2549 
2550 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2551 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
2552   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
2553 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
2554 
2555 /* Store match begin and end. */
2556 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
2558 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
2559 
2560 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
2561 
2562 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2563 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2564 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2565 #endif
2566 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
2567 
2568 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S0, 0);
2569 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2570 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
2571 #endif
2572 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
2573 
2574 JUMPTO(SLJIT_JUMP, quit);
2575 }
2576 
check_start_used_ptr(compiler_common * common)2577 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2578 {
2579 /* May destroy TMP1. */
2580 DEFINE_COMPILER;
2581 struct sljit_jump *jump;
2582 
2583 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2584   {
2585   /* The value of -1 must be kept for start_used_ptr! */
2586   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2587   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2588   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2589   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2590   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2591   JUMPHERE(jump);
2592   }
2593 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
2594   {
2595   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2596   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2597   JUMPHERE(jump);
2598   }
2599 }
2600 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)2601 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
2602 {
2603 /* Detects if the character has an othercase. */
2604 unsigned int c;
2605 
2606 #ifdef SUPPORT_UNICODE
2607 if (common->utf)
2608   {
2609   GETCHAR(c, cc);
2610   if (c > 127)
2611     {
2612     return c != UCD_OTHERCASE(c);
2613     }
2614 #if PCRE2_CODE_UNIT_WIDTH != 8
2615   return common->fcc[c] != c;
2616 #endif
2617   }
2618 else
2619 #endif
2620   c = *cc;
2621 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2622 }
2623 
char_othercase(compiler_common * common,unsigned int c)2624 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2625 {
2626 /* Returns with the othercase. */
2627 #ifdef SUPPORT_UNICODE
2628 if (common->utf && c > 127)
2629   {
2630   return UCD_OTHERCASE(c);
2631   }
2632 #endif
2633 return TABLE_GET(c, common->fcc, c);
2634 }
2635 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)2636 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
2637 {
2638 /* Detects if the character and its othercase has only 1 bit difference. */
2639 unsigned int c, oc, bit;
2640 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2641 int n;
2642 #endif
2643 
2644 #ifdef SUPPORT_UNICODE
2645 if (common->utf)
2646   {
2647   GETCHAR(c, cc);
2648   if (c <= 127)
2649     oc = common->fcc[c];
2650   else
2651     {
2652     oc = UCD_OTHERCASE(c);
2653     }
2654   }
2655 else
2656   {
2657   c = *cc;
2658   oc = TABLE_GET(c, common->fcc, c);
2659   }
2660 #else
2661 c = *cc;
2662 oc = TABLE_GET(c, common->fcc, c);
2663 #endif
2664 
2665 SLJIT_ASSERT(c != oc);
2666 
2667 bit = c ^ oc;
2668 /* Optimized for English alphabet. */
2669 if (c <= 127 && bit == 0x20)
2670   return (0 << 8) | 0x20;
2671 
2672 /* Since c != oc, they must have at least 1 bit difference. */
2673 if (!is_powerof2(bit))
2674   return 0;
2675 
2676 #if PCRE2_CODE_UNIT_WIDTH == 8
2677 
2678 #ifdef SUPPORT_UNICODE
2679 if (common->utf && c > 127)
2680   {
2681   n = GET_EXTRALEN(*cc);
2682   while ((bit & 0x3f) == 0)
2683     {
2684     n--;
2685     bit >>= 6;
2686     }
2687   return (n << 8) | bit;
2688   }
2689 #endif /* SUPPORT_UNICODE */
2690 return (0 << 8) | bit;
2691 
2692 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2693 
2694 #ifdef SUPPORT_UNICODE
2695 if (common->utf && c > 65535)
2696   {
2697   if (bit >= (1 << 10))
2698     bit >>= 10;
2699   else
2700     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2701   }
2702 #endif /* SUPPORT_UNICODE */
2703 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2704 
2705 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
2706 }
2707 
check_partial(compiler_common * common,BOOL force)2708 static void check_partial(compiler_common *common, BOOL force)
2709 {
2710 /* Checks whether a partial matching is occurred. Does not modify registers. */
2711 DEFINE_COMPILER;
2712 struct sljit_jump *jump = NULL;
2713 
2714 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
2715 
2716 if (common->mode == PCRE2_JIT_COMPLETE)
2717   return;
2718 
2719 if (!force)
2720   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2721 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2722   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2723 
2724 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2725   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2726 else
2727   {
2728   if (common->partialmatchlabel != NULL)
2729     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2730   else
2731     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2732   }
2733 
2734 if (jump != NULL)
2735   JUMPHERE(jump);
2736 }
2737 
check_str_end(compiler_common * common,jump_list ** end_reached)2738 static void check_str_end(compiler_common *common, jump_list **end_reached)
2739 {
2740 /* Does not affect registers. Usually used in a tight spot. */
2741 DEFINE_COMPILER;
2742 struct sljit_jump *jump;
2743 
2744 if (common->mode == PCRE2_JIT_COMPLETE)
2745   {
2746   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2747   return;
2748   }
2749 
2750 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2751 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2752   {
2753   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2754   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2755   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2756   }
2757 else
2758   {
2759   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2760   if (common->partialmatchlabel != NULL)
2761     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2762   else
2763     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2764   }
2765 JUMPHERE(jump);
2766 }
2767 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2768 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2769 {
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump;
2772 
2773 if (common->mode == PCRE2_JIT_COMPLETE)
2774   {
2775   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2776   return;
2777   }
2778 
2779 /* Partial matching mode. */
2780 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2781 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2782 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2783   {
2784   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2786   }
2787 else
2788   {
2789   if (common->partialmatchlabel != NULL)
2790     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2791   else
2792     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2793   }
2794 JUMPHERE(jump);
2795 }
2796 
peek_char(compiler_common * common,sljit_u32 max)2797 static void peek_char(compiler_common *common, sljit_u32 max)
2798 {
2799 /* Reads the character into TMP1, keeps STR_PTR.
2800 Does not check STR_END. TMP2 Destroyed. */
2801 DEFINE_COMPILER;
2802 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2803 struct sljit_jump *jump;
2804 #endif
2805 
2806 SLJIT_UNUSED_ARG(max);
2807 
2808 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2809 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2810 if (common->utf)
2811   {
2812   if (max < 128) return;
2813 
2814   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2815   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2816   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2817   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2818   JUMPHERE(jump);
2819   }
2820 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
2821 
2822 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
2823 if (common->utf)
2824   {
2825   if (max < 0xd800) return;
2826 
2827   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2828   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2829   /* TMP2 contains the high surrogate. */
2830   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2831   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2832   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2833   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2834   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2835   JUMPHERE(jump);
2836   }
2837 #endif
2838 }
2839 
2840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2841 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2842 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2843 {
2844 /* Tells whether the character codes below 128 are enough
2845 to determine a match. */
2846 const sljit_u8 value = nclass ? 0xff : 0;
2847 const sljit_u8 *end = bitset + 32;
2848 
2849 bitset += 16;
2850 do
2851   {
2852   if (*bitset++ != value)
2853     return FALSE;
2854   }
2855 while (bitset < end);
2856 return TRUE;
2857 }
2858 
read_char7_type(compiler_common * common,BOOL full_read)2859 static void read_char7_type(compiler_common *common, BOOL full_read)
2860 {
2861 /* Reads the precise character type of a character into TMP1, if the character
2862 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2863 full_read argument tells whether characters above max are accepted or not. */
2864 DEFINE_COMPILER;
2865 struct sljit_jump *jump;
2866 
2867 SLJIT_ASSERT(common->utf);
2868 
2869 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871 
2872 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2873 
2874 if (full_read)
2875   {
2876   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2877   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2878   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2879   JUMPHERE(jump);
2880   }
2881 }
2882 
2883 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
2884 
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2885 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2886 {
2887 /* Reads the precise value of a character into TMP1, if the character is
2888 between min and max (c >= min && c <= max). Otherwise it returns with a value
2889 outside the range. Does not check STR_END. */
2890 DEFINE_COMPILER;
2891 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2892 struct sljit_jump *jump;
2893 #endif
2894 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2895 struct sljit_jump *jump2;
2896 #endif
2897 
2898 SLJIT_UNUSED_ARG(update_str_ptr);
2899 SLJIT_UNUSED_ARG(min);
2900 SLJIT_UNUSED_ARG(max);
2901 SLJIT_ASSERT(min <= max);
2902 
2903 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2904 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2905 
2906 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2907 if (common->utf)
2908   {
2909   if (max < 128 && !update_str_ptr) return;
2910 
2911   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2912   if (min >= 0x10000)
2913     {
2914     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2915     if (update_str_ptr)
2916       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2917     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2918     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2919     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2920     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2921     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2922     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2923     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2924     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2925     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2927     if (!update_str_ptr)
2928       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2929     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2930     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2931     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2932     JUMPHERE(jump2);
2933     if (update_str_ptr)
2934       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2935     }
2936   else if (min >= 0x800 && max <= 0xffff)
2937     {
2938     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2939     if (update_str_ptr)
2940       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2941     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2942     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2943     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2944     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2945     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2946     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2947     if (!update_str_ptr)
2948       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2949     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2950     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2951     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2952     JUMPHERE(jump2);
2953     if (update_str_ptr)
2954       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2955     }
2956   else if (max >= 0x800)
2957     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2958   else if (max < 128)
2959     {
2960     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2961     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2962     }
2963   else
2964     {
2965     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2966     if (!update_str_ptr)
2967       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2968     else
2969       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2970     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2971     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2973     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2974     if (update_str_ptr)
2975       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2976     }
2977   JUMPHERE(jump);
2978   }
2979 #endif
2980 
2981 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
2982 if (common->utf)
2983   {
2984   if (max >= 0x10000)
2985     {
2986     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2987     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2988     /* TMP2 contains the high surrogate. */
2989     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2990     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2991     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2992     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2993     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2994     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2995     JUMPHERE(jump);
2996     return;
2997     }
2998 
2999   if (max < 0xd800 && !update_str_ptr) return;
3000 
3001   /* Skip low surrogate if necessary. */
3002   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3003   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3004   if (update_str_ptr)
3005     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3006   if (max >= 0xd800)
3007     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3008   JUMPHERE(jump);
3009   }
3010 #endif
3011 }
3012 
read_char(compiler_common * common)3013 static SLJIT_INLINE void read_char(compiler_common *common)
3014 {
3015 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3016 }
3017 
read_char8_type(compiler_common * common,BOOL update_str_ptr)3018 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3019 {
3020 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3021 DEFINE_COMPILER;
3022 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3023 struct sljit_jump *jump;
3024 #endif
3025 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3026 struct sljit_jump *jump2;
3027 #endif
3028 
3029 SLJIT_UNUSED_ARG(update_str_ptr);
3030 
3031 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3033 
3034 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3035 if (common->utf)
3036   {
3037   /* This can be an extra read in some situations, but hopefully
3038   it is needed in most cases. */
3039   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3040   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3041   if (!update_str_ptr)
3042     {
3043     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3044     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3045     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3046     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3047     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3048     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3049     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3050     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3051     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3052     JUMPHERE(jump2);
3053     }
3054   else
3055     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3056   JUMPHERE(jump);
3057   return;
3058   }
3059 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3060 
3061 #if PCRE2_CODE_UNIT_WIDTH != 8
3062 /* The ctypes array contains only 256 values. */
3063 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3064 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3065 #endif
3066 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3067 #if PCRE2_CODE_UNIT_WIDTH != 8
3068 JUMPHERE(jump);
3069 #endif
3070 
3071 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3072 if (common->utf && update_str_ptr)
3073   {
3074   /* Skip low surrogate if necessary. */
3075   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3076   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3077   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3078   JUMPHERE(jump);
3079   }
3080 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3081 }
3082 
skip_char_back(compiler_common * common)3083 static void skip_char_back(compiler_common *common)
3084 {
3085 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3086 DEFINE_COMPILER;
3087 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3088 #if PCRE2_CODE_UNIT_WIDTH == 8
3089 struct sljit_label *label;
3090 
3091 if (common->utf)
3092   {
3093   label = LABEL();
3094   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3095   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3096   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3097   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3098   return;
3099   }
3100 #elif PCRE2_CODE_UNIT_WIDTH == 16
3101 if (common->utf)
3102   {
3103   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3104   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3105   /* Skip low surrogate if necessary. */
3106   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3107   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3108   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3109   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3110   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3111   return;
3112   }
3113 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3114 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3115 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3116 }
3117 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3118 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3119 {
3120 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3121 DEFINE_COMPILER;
3122 struct sljit_jump *jump;
3123 
3124 if (nltype == NLTYPE_ANY)
3125   {
3126   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3127   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3128   }
3129 else if (nltype == NLTYPE_ANYCRLF)
3130   {
3131   if (jumpifmatch)
3132     {
3133     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3134     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3135     }
3136   else
3137     {
3138     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3139     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3140     JUMPHERE(jump);
3141     }
3142   }
3143 else
3144   {
3145   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3146   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3147   }
3148 }
3149 
3150 #ifdef SUPPORT_UNICODE
3151 
3152 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3153 static void do_utfreadchar(compiler_common *common)
3154 {
3155 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3156 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3157 DEFINE_COMPILER;
3158 struct sljit_jump *jump;
3159 
3160 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3161 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3162 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3163 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3164 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3165 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3166 
3167 /* Searching for the first zero. */
3168 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3169 jump = JUMP(SLJIT_NOT_ZERO);
3170 /* Two byte sequence. */
3171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3173 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3174 
3175 JUMPHERE(jump);
3176 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3177 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3178 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3179 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3180 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3181 
3182 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3183 jump = JUMP(SLJIT_NOT_ZERO);
3184 /* Three byte sequence. */
3185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3186 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3187 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3188 
3189 /* Four byte sequence. */
3190 JUMPHERE(jump);
3191 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3192 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3193 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3195 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3196 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3198 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3199 }
3200 
do_utfreadchar16(compiler_common * common)3201 static void do_utfreadchar16(compiler_common *common)
3202 {
3203 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3204 of the character (>= 0xc0). Return value in TMP1. */
3205 DEFINE_COMPILER;
3206 struct sljit_jump *jump;
3207 
3208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3209 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3210 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3211 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3212 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3213 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3214 
3215 /* Searching for the first zero. */
3216 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3217 jump = JUMP(SLJIT_NOT_ZERO);
3218 /* Two byte sequence. */
3219 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3220 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3221 
3222 JUMPHERE(jump);
3223 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3224 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3225 /* This code runs only in 8 bit mode. No need to shift the value. */
3226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3227 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3228 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3230 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3231 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3232 /* Three byte sequence. */
3233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3234 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3235 }
3236 
do_utfreadtype8(compiler_common * common)3237 static void do_utfreadtype8(compiler_common *common)
3238 {
3239 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3240 of the character (>= 0xc0). Return value in TMP1. */
3241 DEFINE_COMPILER;
3242 struct sljit_jump *jump;
3243 struct sljit_jump *compare;
3244 
3245 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3246 
3247 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3248 jump = JUMP(SLJIT_NOT_ZERO);
3249 /* Two byte sequence. */
3250 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3251 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3252 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3253 /* The upper 5 bits are known at this point. */
3254 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3255 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3256 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3257 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3258 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3259 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3260 
3261 JUMPHERE(compare);
3262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3263 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3264 
3265 /* We only have types for characters less than 256. */
3266 JUMPHERE(jump);
3267 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3270 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3271 }
3272 
3273 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3274 
3275 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3276 #define UCD_BLOCK_MASK 127
3277 #define UCD_BLOCK_SHIFT 7
3278 
do_getucd(compiler_common * common)3279 static void do_getucd(compiler_common *common)
3280 {
3281 /* Search the UCD record for the character comes in TMP1.
3282 Returns chartype in TMP1 and UCD offset in TMP2. */
3283 DEFINE_COMPILER;
3284 
3285 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3286 
3287 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3288 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3289 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3290 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3291 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3292 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3294 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3296 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3297 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3298 }
3299 
3300 #endif /* SUPPORT_UNICODE */
3301 
mainloop_entry(compiler_common * common,BOOL hascrorlf,sljit_u32 overall_options)3302 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, sljit_u32 overall_options)
3303 {
3304 DEFINE_COMPILER;
3305 struct sljit_label *mainloop;
3306 struct sljit_label *newlinelabel = NULL;
3307 struct sljit_jump *start;
3308 struct sljit_jump *end = NULL;
3309 struct sljit_jump *end2 = NULL;
3310 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3311 struct sljit_jump *singlechar;
3312 #endif
3313 jump_list *newline = NULL;
3314 BOOL newlinecheck = FALSE;
3315 BOOL readuchar = FALSE;
3316 
3317 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
3318     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3319   newlinecheck = TRUE;
3320 
3321 SLJIT_ASSERT(common->forced_quit_label == NULL);
3322 
3323 if ((overall_options & PCRE2_FIRSTLINE) != 0)
3324   {
3325   /* Search for the end of the first line. */
3326   SLJIT_ASSERT(common->match_end_ptr != 0);
3327   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3328 
3329   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3330     {
3331     mainloop = LABEL();
3332     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3333     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3334     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3335     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3336     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3337     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3338     JUMPHERE(end);
3339     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3340     }
3341   else
3342     {
3343     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3344     mainloop = LABEL();
3345     /* Continual stores does not cause data dependency. */
3346     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3347     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3348     check_newlinechar(common, common->nltype, &newline, TRUE);
3349     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3350     JUMPHERE(end);
3351     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3352     set_jumps(newline, LABEL());
3353     }
3354 
3355   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3356   }
3357 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
3358   {
3359   /* Check whether offset limit is set and valid. */
3360   SLJIT_ASSERT(common->match_end_ptr != 0);
3361 
3362   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3363   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
3364   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3365   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
3366   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3367 #if PCRE2_CODE_UNIT_WIDTH == 16
3368   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3369 #elif PCRE2_CODE_UNIT_WIDTH == 32
3370   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
3371 #endif
3372   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3373   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3374   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
3375   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3376   JUMPHERE(end2);
3377   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
3378   add_jump(compiler, &common->forced_quit, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
3379   JUMPHERE(end);
3380   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
3381   }
3382 
3383 start = JUMP(SLJIT_JUMP);
3384 
3385 if (newlinecheck)
3386   {
3387   newlinelabel = LABEL();
3388   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3389   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3390   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3391   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3392   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3393 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3394   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3395 #endif
3396   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3397   end2 = JUMP(SLJIT_JUMP);
3398   }
3399 
3400 mainloop = LABEL();
3401 
3402 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3403 #ifdef SUPPORT_UNICODE
3404 if (common->utf) readuchar = TRUE;
3405 #endif
3406 if (newlinecheck) readuchar = TRUE;
3407 
3408 if (readuchar)
3409   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3410 
3411 if (newlinecheck)
3412   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3413 
3414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3416 #if PCRE2_CODE_UNIT_WIDTH == 8
3417 if (common->utf)
3418   {
3419   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3420   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3421   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3422   JUMPHERE(singlechar);
3423   }
3424 #elif PCRE2_CODE_UNIT_WIDTH == 16
3425 if (common->utf)
3426   {
3427   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3428   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3429   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3430   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3431   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3432   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3433   JUMPHERE(singlechar);
3434   }
3435 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3436 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3437 JUMPHERE(start);
3438 
3439 if (newlinecheck)
3440   {
3441   JUMPHERE(end);
3442   JUMPHERE(end2);
3443   }
3444 
3445 return mainloop;
3446 }
3447 
3448 #define MAX_N_CHARS 16
3449 #define MAX_DIFF_CHARS 6
3450 
add_prefix_char(PCRE2_UCHAR chr,PCRE2_UCHAR * chars)3451 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, PCRE2_UCHAR *chars)
3452 {
3453 PCRE2_UCHAR i, len;
3454 
3455 len = chars[0];
3456 if (len == 255)
3457   return;
3458 
3459 if (len == 0)
3460   {
3461   chars[0] = 1;
3462   chars[1] = chr;
3463   return;
3464   }
3465 
3466 for (i = len; i > 0; i--)
3467   if (chars[i] == chr)
3468     return;
3469 
3470 if (len >= MAX_DIFF_CHARS - 1)
3471   {
3472   chars[0] = 255;
3473   return;
3474   }
3475 
3476 len++;
3477 chars[len] = chr;
3478 chars[0] = len;
3479 }
3480 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * chars,int max_chars,sljit_u32 * rec_count)3481 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *chars, int max_chars, sljit_u32 *rec_count)
3482 {
3483 /* Recursive function, which scans prefix literals. */
3484 BOOL last, any, class, caseless;
3485 int len, repeat, len_save, consumed = 0;
3486 sljit_u32 chr; /* Any unicode character. */
3487 sljit_u8 *bytes, *bytes_end, byte;
3488 PCRE2_SPTR alternative, cc_save, oc;
3489 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3490 PCRE2_UCHAR othercase[8];
3491 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3492 PCRE2_UCHAR othercase[2];
3493 #else
3494 PCRE2_UCHAR othercase[1];
3495 #endif
3496 
3497 repeat = 1;
3498 while (TRUE)
3499   {
3500   if (*rec_count == 0)
3501     return 0;
3502   (*rec_count)--;
3503 
3504   last = TRUE;
3505   any = FALSE;
3506   class = FALSE;
3507   caseless = FALSE;
3508 
3509   switch (*cc)
3510     {
3511     case OP_CHARI:
3512     caseless = TRUE;
3513     case OP_CHAR:
3514     last = FALSE;
3515     cc++;
3516     break;
3517 
3518     case OP_SOD:
3519     case OP_SOM:
3520     case OP_SET_SOM:
3521     case OP_NOT_WORD_BOUNDARY:
3522     case OP_WORD_BOUNDARY:
3523     case OP_EODN:
3524     case OP_EOD:
3525     case OP_CIRC:
3526     case OP_CIRCM:
3527     case OP_DOLL:
3528     case OP_DOLLM:
3529     /* Zero width assertions. */
3530     cc++;
3531     continue;
3532 
3533     case OP_ASSERT:
3534     case OP_ASSERT_NOT:
3535     case OP_ASSERTBACK:
3536     case OP_ASSERTBACK_NOT:
3537     cc = bracketend(cc);
3538     continue;
3539 
3540     case OP_PLUSI:
3541     case OP_MINPLUSI:
3542     case OP_POSPLUSI:
3543     caseless = TRUE;
3544     case OP_PLUS:
3545     case OP_MINPLUS:
3546     case OP_POSPLUS:
3547     cc++;
3548     break;
3549 
3550     case OP_EXACTI:
3551     caseless = TRUE;
3552     case OP_EXACT:
3553     repeat = GET2(cc, 1);
3554     last = FALSE;
3555     cc += 1 + IMM2_SIZE;
3556     break;
3557 
3558     case OP_QUERYI:
3559     case OP_MINQUERYI:
3560     case OP_POSQUERYI:
3561     caseless = TRUE;
3562     case OP_QUERY:
3563     case OP_MINQUERY:
3564     case OP_POSQUERY:
3565     len = 1;
3566     cc++;
3567 #ifdef SUPPORT_UNICODE
3568     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3569 #endif
3570     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3571     if (max_chars == 0)
3572       return consumed;
3573     last = FALSE;
3574     break;
3575 
3576     case OP_KET:
3577     cc += 1 + LINK_SIZE;
3578     continue;
3579 
3580     case OP_ALT:
3581     cc += GET(cc, 1);
3582     continue;
3583 
3584     case OP_ONCE:
3585     case OP_ONCE_NC:
3586     case OP_BRA:
3587     case OP_BRAPOS:
3588     case OP_CBRA:
3589     case OP_CBRAPOS:
3590     alternative = cc + GET(cc, 1);
3591     while (*alternative == OP_ALT)
3592       {
3593       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3594       if (max_chars == 0)
3595         return consumed;
3596       alternative += GET(alternative, 1);
3597       }
3598 
3599     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3600       cc += IMM2_SIZE;
3601     cc += 1 + LINK_SIZE;
3602     continue;
3603 
3604     case OP_CLASS:
3605 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3606     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3607       return consumed;
3608 #endif
3609     class = TRUE;
3610     break;
3611 
3612     case OP_NCLASS:
3613 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3614     if (common->utf) return consumed;
3615 #endif
3616     class = TRUE;
3617     break;
3618 
3619 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3620     case OP_XCLASS:
3621 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3622     if (common->utf) return consumed;
3623 #endif
3624     any = TRUE;
3625     cc += GET(cc, 1);
3626     break;
3627 #endif
3628 
3629     case OP_DIGIT:
3630 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3631     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3632       return consumed;
3633 #endif
3634     any = TRUE;
3635     cc++;
3636     break;
3637 
3638     case OP_WHITESPACE:
3639 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3640     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3641       return consumed;
3642 #endif
3643     any = TRUE;
3644     cc++;
3645     break;
3646 
3647     case OP_WORDCHAR:
3648 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3649     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3650       return consumed;
3651 #endif
3652     any = TRUE;
3653     cc++;
3654     break;
3655 
3656     case OP_NOT:
3657     case OP_NOTI:
3658     cc++;
3659     /* Fall through. */
3660     case OP_NOT_DIGIT:
3661     case OP_NOT_WHITESPACE:
3662     case OP_NOT_WORDCHAR:
3663     case OP_ANY:
3664     case OP_ALLANY:
3665 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3666     if (common->utf) return consumed;
3667 #endif
3668     any = TRUE;
3669     cc++;
3670     break;
3671 
3672 #ifdef SUPPORT_UNICODE
3673     case OP_NOTPROP:
3674     case OP_PROP:
3675 #if PCRE2_CODE_UNIT_WIDTH != 32
3676     if (common->utf) return consumed;
3677 #endif
3678     any = TRUE;
3679     cc += 1 + 2;
3680     break;
3681 #endif
3682 
3683     case OP_TYPEEXACT:
3684     repeat = GET2(cc, 1);
3685     cc += 1 + IMM2_SIZE;
3686     continue;
3687 
3688     case OP_NOTEXACT:
3689     case OP_NOTEXACTI:
3690 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3691     if (common->utf) return consumed;
3692 #endif
3693     any = TRUE;
3694     repeat = GET2(cc, 1);
3695     cc += 1 + IMM2_SIZE + 1;
3696     break;
3697 
3698     default:
3699     return consumed;
3700     }
3701 
3702   if (any)
3703     {
3704     do
3705       {
3706       chars[0] = 255;
3707 
3708       consumed++;
3709       if (--max_chars == 0)
3710         return consumed;
3711       chars += MAX_DIFF_CHARS;
3712       }
3713     while (--repeat > 0);
3714 
3715     repeat = 1;
3716     continue;
3717     }
3718 
3719   if (class)
3720     {
3721     bytes = (sljit_u8*) (cc + 1);
3722     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
3723 
3724     switch (*cc)
3725       {
3726       case OP_CRSTAR:
3727       case OP_CRMINSTAR:
3728       case OP_CRPOSSTAR:
3729       case OP_CRQUERY:
3730       case OP_CRMINQUERY:
3731       case OP_CRPOSQUERY:
3732       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3733       if (max_chars == 0)
3734         return consumed;
3735       break;
3736 
3737       default:
3738       case OP_CRPLUS:
3739       case OP_CRMINPLUS:
3740       case OP_CRPOSPLUS:
3741       break;
3742 
3743       case OP_CRRANGE:
3744       case OP_CRMINRANGE:
3745       case OP_CRPOSRANGE:
3746       repeat = GET2(cc, 1);
3747       if (repeat <= 0)
3748         return consumed;
3749       break;
3750       }
3751 
3752     do
3753       {
3754       if (bytes[31] & 0x80)
3755         chars[0] = 255;
3756       else if (chars[0] != 255)
3757         {
3758         bytes_end = bytes + 32;
3759         chr = 0;
3760         do
3761           {
3762           byte = *bytes++;
3763           SLJIT_ASSERT((chr & 0x7) == 0);
3764           if (byte == 0)
3765             chr += 8;
3766           else
3767             {
3768             do
3769               {
3770               if ((byte & 0x1) != 0)
3771                 add_prefix_char(chr, chars);
3772               byte >>= 1;
3773               chr++;
3774               }
3775             while (byte != 0);
3776             chr = (chr + 7) & ~7;
3777             }
3778           }
3779         while (chars[0] != 255 && bytes < bytes_end);
3780         bytes = bytes_end - 32;
3781         }
3782 
3783       consumed++;
3784       if (--max_chars == 0)
3785         return consumed;
3786       chars += MAX_DIFF_CHARS;
3787       }
3788     while (--repeat > 0);
3789 
3790     switch (*cc)
3791       {
3792       case OP_CRSTAR:
3793       case OP_CRMINSTAR:
3794       case OP_CRPOSSTAR:
3795       return consumed;
3796 
3797       case OP_CRQUERY:
3798       case OP_CRMINQUERY:
3799       case OP_CRPOSQUERY:
3800       cc++;
3801       break;
3802 
3803       case OP_CRRANGE:
3804       case OP_CRMINRANGE:
3805       case OP_CRPOSRANGE:
3806       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3807         return consumed;
3808       cc += 1 + 2 * IMM2_SIZE;
3809       break;
3810       }
3811 
3812     repeat = 1;
3813     continue;
3814     }
3815 
3816   len = 1;
3817 #ifdef SUPPORT_UNICODE
3818   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3819 #endif
3820 
3821   if (caseless && char_has_othercase(common, cc))
3822     {
3823 #ifdef SUPPORT_UNICODE
3824     if (common->utf)
3825       {
3826       GETCHAR(chr, cc);
3827       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3828         return consumed;
3829       }
3830     else
3831 #endif
3832       {
3833       chr = *cc;
3834       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3835       }
3836     }
3837   else
3838     {
3839     caseless = FALSE;
3840     othercase[0] = 0; /* Stops compiler warning - PH */
3841     }
3842 
3843   len_save = len;
3844   cc_save = cc;
3845   while (TRUE)
3846     {
3847     oc = othercase;
3848     do
3849       {
3850       chr = *cc;
3851       add_prefix_char(*cc, chars);
3852 
3853       if (caseless)
3854         add_prefix_char(*oc, chars);
3855 
3856       len--;
3857       consumed++;
3858       if (--max_chars == 0)
3859         return consumed;
3860       chars += MAX_DIFF_CHARS;
3861       cc++;
3862       oc++;
3863       }
3864     while (len > 0);
3865 
3866     if (--repeat == 0)
3867       break;
3868 
3869     len = len_save;
3870     cc = cc_save;
3871     }
3872 
3873   repeat = 1;
3874   if (last)
3875     return consumed;
3876   }
3877 }
3878 
3879 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3880 
character_to_int32(PCRE2_UCHAR chr)3881 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
3882 {
3883 sljit_s32 value = (sljit_s32)chr;
3884 #if PCRE2_CODE_UNIT_WIDTH == 8
3885 #define SSE2_COMPARE_TYPE_INDEX 0
3886 return (value << 24) | (value << 16) | (value << 8) | value;
3887 #elif PCRE2_CODE_UNIT_WIDTH == 16
3888 #define SSE2_COMPARE_TYPE_INDEX 1
3889 return (value << 16) | value;
3890 #elif PCRE2_CODE_UNIT_WIDTH == 32
3891 #define SSE2_COMPARE_TYPE_INDEX 2
3892 return value;
3893 #else
3894 #error "Unsupported unit width"
3895 #endif
3896 }
3897 
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2)3898 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
3899 {
3900 DEFINE_COMPILER;
3901 struct sljit_label *start;
3902 struct sljit_jump *quit[3];
3903 struct sljit_jump *nomatch;
3904 sljit_u8 instruction[8];
3905 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3906 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3907 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3908 BOOL load_twice = FALSE;
3909 PCRE2_UCHAR bit;
3910 
3911 bit = char1 ^ char2;
3912 if (!is_powerof2(bit))
3913   bit = 0;
3914 
3915 if ((char1 != char2) && bit == 0)
3916   load_twice = TRUE;
3917 
3918 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3919 
3920 /* First part (unaligned start) */
3921 
3922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3923 
3924 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3925 
3926 /* MOVD xmm, r/m32 */
3927 instruction[0] = 0x66;
3928 instruction[1] = 0x0f;
3929 instruction[2] = 0x6e;
3930 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3931 sljit_emit_op_custom(compiler, instruction, 4);
3932 
3933 if (char1 != char2)
3934   {
3935   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3936 
3937   /* MOVD xmm, r/m32 */
3938   instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3939   sljit_emit_op_custom(compiler, instruction, 4);
3940   }
3941 
3942 /* PSHUFD xmm1, xmm2/m128, imm8 */
3943 instruction[2] = 0x70;
3944 instruction[3] = 0xc0 | (2 << 3) | 2;
3945 instruction[4] = 0;
3946 sljit_emit_op_custom(compiler, instruction, 5);
3947 
3948 if (char1 != char2)
3949   {
3950   /* PSHUFD xmm1, xmm2/m128, imm8 */
3951   instruction[3] = 0xc0 | (3 << 3) | 3;
3952   instruction[4] = 0;
3953   sljit_emit_op_custom(compiler, instruction, 5);
3954   }
3955 
3956 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3957 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3958 
3959 /* MOVDQA xmm1, xmm2/m128 */
3960 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3961 
3962 if (str_ptr_ind < 8)
3963   {
3964   instruction[2] = 0x6f;
3965   instruction[3] = (0 << 3) | str_ptr_ind;
3966   sljit_emit_op_custom(compiler, instruction, 4);
3967 
3968   if (load_twice)
3969     {
3970     instruction[3] = (1 << 3) | str_ptr_ind;
3971     sljit_emit_op_custom(compiler, instruction, 4);
3972     }
3973   }
3974 else
3975   {
3976   instruction[1] = 0x41;
3977   instruction[2] = 0x0f;
3978   instruction[3] = 0x6f;
3979   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3980   sljit_emit_op_custom(compiler, instruction, 5);
3981 
3982   if (load_twice)
3983     {
3984     instruction[4] = (1 << 3) | str_ptr_ind;
3985     sljit_emit_op_custom(compiler, instruction, 5);
3986     }
3987   instruction[1] = 0x0f;
3988   }
3989 
3990 #else
3991 
3992 instruction[2] = 0x6f;
3993 instruction[3] = (0 << 3) | str_ptr_ind;
3994 sljit_emit_op_custom(compiler, instruction, 4);
3995 
3996 if (load_twice)
3997   {
3998   instruction[3] = (1 << 3) | str_ptr_ind;
3999   sljit_emit_op_custom(compiler, instruction, 4);
4000   }
4001 
4002 #endif
4003 
4004 if (bit != 0)
4005   {
4006   /* POR xmm1, xmm2/m128 */
4007   instruction[2] = 0xeb;
4008   instruction[3] = 0xc0 | (0 << 3) | 3;
4009   sljit_emit_op_custom(compiler, instruction, 4);
4010   }
4011 
4012 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4013 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4014 instruction[3] = 0xc0 | (0 << 3) | 2;
4015 sljit_emit_op_custom(compiler, instruction, 4);
4016 
4017 if (load_twice)
4018   {
4019   instruction[3] = 0xc0 | (1 << 3) | 3;
4020   sljit_emit_op_custom(compiler, instruction, 4);
4021   }
4022 
4023 /* PMOVMSKB reg, xmm */
4024 instruction[2] = 0xd7;
4025 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4026 sljit_emit_op_custom(compiler, instruction, 4);
4027 
4028 if (load_twice)
4029   {
4030   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4031   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4032   sljit_emit_op_custom(compiler, instruction, 4);
4033 
4034   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4035   OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4036   }
4037 
4038 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4039 
4040 /* BSF r32, r/m32 */
4041 instruction[0] = 0x0f;
4042 instruction[1] = 0xbc;
4043 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4044 sljit_emit_op_custom(compiler, instruction, 3);
4045 
4046 nomatch = JUMP(SLJIT_ZERO);
4047 
4048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4050 quit[1] = JUMP(SLJIT_JUMP);
4051 
4052 JUMPHERE(nomatch);
4053 
4054 start = LABEL();
4055 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4056 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4057 
4058 /* Second part (aligned) */
4059 
4060 instruction[0] = 0x66;
4061 instruction[1] = 0x0f;
4062 
4063 /* MOVDQA xmm1, xmm2/m128 */
4064 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4065 
4066 if (str_ptr_ind < 8)
4067   {
4068   instruction[2] = 0x6f;
4069   instruction[3] = (0 << 3) | str_ptr_ind;
4070   sljit_emit_op_custom(compiler, instruction, 4);
4071 
4072   if (load_twice)
4073     {
4074     instruction[3] = (1 << 3) | str_ptr_ind;
4075     sljit_emit_op_custom(compiler, instruction, 4);
4076     }
4077   }
4078 else
4079   {
4080   instruction[1] = 0x41;
4081   instruction[2] = 0x0f;
4082   instruction[3] = 0x6f;
4083   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4084   sljit_emit_op_custom(compiler, instruction, 5);
4085 
4086   if (load_twice)
4087     {
4088     instruction[4] = (1 << 3) | str_ptr_ind;
4089     sljit_emit_op_custom(compiler, instruction, 5);
4090     }
4091   instruction[1] = 0x0f;
4092   }
4093 
4094 #else
4095 
4096 instruction[2] = 0x6f;
4097 instruction[3] = (0 << 3) | str_ptr_ind;
4098 sljit_emit_op_custom(compiler, instruction, 4);
4099 
4100 if (load_twice)
4101   {
4102   instruction[3] = (1 << 3) | str_ptr_ind;
4103   sljit_emit_op_custom(compiler, instruction, 4);
4104   }
4105 
4106 #endif
4107 
4108 if (bit != 0)
4109   {
4110   /* POR xmm1, xmm2/m128 */
4111   instruction[2] = 0xeb;
4112   instruction[3] = 0xc0 | (0 << 3) | 3;
4113   sljit_emit_op_custom(compiler, instruction, 4);
4114   }
4115 
4116 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4117 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4118 instruction[3] = 0xc0 | (0 << 3) | 2;
4119 sljit_emit_op_custom(compiler, instruction, 4);
4120 
4121 if (load_twice)
4122   {
4123   instruction[3] = 0xc0 | (1 << 3) | 3;
4124   sljit_emit_op_custom(compiler, instruction, 4);
4125   }
4126 
4127 /* PMOVMSKB reg, xmm */
4128 instruction[2] = 0xd7;
4129 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4130 sljit_emit_op_custom(compiler, instruction, 4);
4131 
4132 if (load_twice)
4133   {
4134   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4135   sljit_emit_op_custom(compiler, instruction, 4);
4136 
4137   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4138   }
4139 
4140 /* BSF r32, r/m32 */
4141 instruction[0] = 0x0f;
4142 instruction[1] = 0xbc;
4143 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4144 sljit_emit_op_custom(compiler, instruction, 3);
4145 
4146 JUMPTO(SLJIT_ZERO, start);
4147 
4148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4149 
4150 start = LABEL();
4151 SET_LABEL(quit[0], start);
4152 SET_LABEL(quit[1], start);
4153 SET_LABEL(quit[2], start);
4154 }
4155 
4156 #undef SSE2_COMPARE_TYPE_INDEX
4157 
4158 #endif
4159 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4160 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4161 {
4162 DEFINE_COMPILER;
4163 struct sljit_label *start;
4164 struct sljit_jump *quit;
4165 struct sljit_jump *found;
4166 PCRE2_UCHAR mask;
4167 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4168 struct sljit_label *utf_start = NULL;
4169 struct sljit_jump *utf_quit = NULL;
4170 #endif
4171 BOOL has_match_end = (common->match_end_ptr != 0);
4172 
4173 if (offset > 0)
4174   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4175 
4176 if (has_match_end)
4177   {
4178   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4179 
4180   OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4181 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4182   if (sljit_x86_is_cmov_available())
4183     {
4184     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4185     sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4186     }
4187 #endif
4188     {
4189     quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4190     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4191     JUMPHERE(quit);
4192     }
4193   }
4194 
4195 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4196 if (common->utf && offset > 0)
4197   utf_start = LABEL();
4198 #endif
4199 
4200 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4201 
4202 /* SSE2 accelerated first character search. */
4203 
4204 if (sljit_x86_is_sse2_available())
4205   {
4206   fast_forward_first_char2_sse2(common, char1, char2);
4207 
4208   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
4209   if (common->mode == PCRE2_JIT_COMPLETE)
4210     {
4211     /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4212     SLJIT_ASSERT(common->forced_quit_label == NULL);
4213     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
4214     add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4215 
4216 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4217     if (common->utf && offset > 0)
4218       {
4219       SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
4220 
4221       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4222       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4223 #if PCRE2_CODE_UNIT_WIDTH == 8
4224       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4225       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4226 #elif PCRE2_CODE_UNIT_WIDTH == 16
4227       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4228       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4229 #else
4230 #error "Unknown code width"
4231 #endif
4232       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4233       }
4234 #endif
4235 
4236     if (offset > 0)
4237       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4238     }
4239   else if (sljit_x86_is_cmov_available())
4240     {
4241     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4242     sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4243     }
4244   else
4245     {
4246     quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4247     OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4248     JUMPHERE(quit);
4249     }
4250 
4251   if (has_match_end)
4252     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4253   return;
4254   }
4255 
4256 #endif
4257 
4258 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4259 
4260 start = LABEL();
4261 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4262 
4263 if (char1 == char2)
4264   found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4265 else
4266   {
4267   mask = char1 ^ char2;
4268   if (is_powerof2(mask))
4269     {
4270     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4271     found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4272     }
4273   else
4274     {
4275     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4276     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4277     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4278     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4279     found = JUMP(SLJIT_NOT_ZERO);
4280     }
4281   }
4282 
4283 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4284 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4285 
4286 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4287 if (common->utf && offset > 0)
4288   utf_quit = JUMP(SLJIT_JUMP);
4289 #endif
4290 
4291 JUMPHERE(found);
4292 
4293 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4294 if (common->utf && offset > 0)
4295   {
4296   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4297   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4298 #if PCRE2_CODE_UNIT_WIDTH == 8
4299   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4300   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4301 #elif PCRE2_CODE_UNIT_WIDTH == 16
4302   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4303   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4304 #else
4305 #error "Unknown code width"
4306 #endif
4307   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4308   JUMPHERE(utf_quit);
4309   }
4310 #endif
4311 
4312 JUMPHERE(quit);
4313 
4314 if (has_match_end)
4315   {
4316   quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4317   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4318   if (offset > 0)
4319     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4320   JUMPHERE(quit);
4321   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4322   }
4323 
4324 if (offset > 0)
4325   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4326 }
4327 
fast_forward_first_n_chars(compiler_common * common)4328 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4329 {
4330 DEFINE_COMPILER;
4331 struct sljit_label *start;
4332 struct sljit_jump *quit;
4333 struct sljit_jump *match;
4334 /* bytes[0] represent the number of characters between 0
4335 and MAX_N_BYTES - 1, 255 represents any character. */
4336 PCRE2_UCHAR chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4337 sljit_s32 offset;
4338 PCRE2_UCHAR mask;
4339 PCRE2_UCHAR *char_set, *char_set_end;
4340 int i, max, from;
4341 int range_right = -1, range_len;
4342 sljit_u8 *update_table = NULL;
4343 BOOL in_range;
4344 sljit_u32 rec_count;
4345 
4346 for (i = 0; i < MAX_N_CHARS; i++)
4347   chars[i * MAX_DIFF_CHARS] = 0;
4348 
4349 rec_count = 10000;
4350 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4351 
4352 if (max < 1)
4353   return FALSE;
4354 
4355 in_range = FALSE;
4356 /* Prevent compiler "uninitialized" warning */
4357 from = 0;
4358 range_len = 4 /* minimum length */ - 1;
4359 for (i = 0; i <= max; i++)
4360   {
4361   if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4362     {
4363     range_len = i - from;
4364     range_right = i - 1;
4365     }
4366 
4367   if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4368     {
4369     SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4370     if (!in_range)
4371       {
4372       in_range = TRUE;
4373       from = i;
4374       }
4375     }
4376   else
4377     in_range = FALSE;
4378   }
4379 
4380 if (range_right >= 0)
4381   {
4382   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4383   if (update_table == NULL)
4384     return TRUE;
4385   memset(update_table, IN_UCHARS(range_len), 256);
4386 
4387   for (i = 0; i < range_len; i++)
4388     {
4389     char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4390     SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4391     char_set_end = char_set + char_set[0];
4392     char_set++;
4393     while (char_set <= char_set_end)
4394       {
4395       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4396         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4397       char_set++;
4398       }
4399     }
4400   }
4401 
4402 offset = -1;
4403 /* Scan forward. */
4404 for (i = 0; i < max; i++)
4405   {
4406   if (offset == -1)
4407     {
4408     if (chars[i * MAX_DIFF_CHARS] <= 2)
4409       offset = i;
4410     }
4411   else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4412     {
4413     if (chars[i * MAX_DIFF_CHARS] == 1)
4414       offset = i;
4415     else
4416       {
4417       mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4418       if (!is_powerof2(mask))
4419         {
4420         mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4421         if (is_powerof2(mask))
4422           offset = i;
4423         }
4424       }
4425     }
4426   }
4427 
4428 if (range_right < 0)
4429   {
4430   if (offset < 0)
4431     return FALSE;
4432   SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4433   /* Works regardless the value is 1 or 2. */
4434   mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4435   fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4436   return TRUE;
4437   }
4438 
4439 if (range_right == offset)
4440   offset = -1;
4441 
4442 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4443 
4444 max -= 1;
4445 SLJIT_ASSERT(max > 0);
4446 if (common->match_end_ptr != 0)
4447   {
4448   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4449   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4450   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4451   quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4452   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4453   JUMPHERE(quit);
4454   }
4455 else
4456   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4457 
4458 SLJIT_ASSERT(range_right >= 0);
4459 
4460 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4461 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4462 #endif
4463 
4464 start = LABEL();
4465 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4466 
4467 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4468 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4469 #else
4470 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4471 #endif
4472 
4473 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4474 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4475 #else
4476 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4477 #endif
4478 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4479 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4480 
4481 if (offset >= 0)
4482   {
4483   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4484   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4485 
4486   if (chars[offset * MAX_DIFF_CHARS] == 1)
4487     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4488   else
4489     {
4490     mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4491     if (is_powerof2(mask))
4492       {
4493       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4494       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4495       }
4496     else
4497       {
4498       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4499       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4500       JUMPHERE(match);
4501       }
4502     }
4503   }
4504 
4505 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4506 if (common->utf && offset != 0)
4507   {
4508   if (offset < 0)
4509     {
4510     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4511     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512     }
4513   else
4514     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4515 #if PCRE2_CODE_UNIT_WIDTH == 8
4516   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4517   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4518 #elif PCRE2_CODE_UNIT_WIDTH == 16
4519   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4520   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4521 #else
4522 #error "Unknown code width"
4523 #endif
4524   if (offset < 0)
4525     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4526   }
4527 #endif
4528 
4529 if (offset >= 0)
4530   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4531 
4532 JUMPHERE(quit);
4533 
4534 if (common->match_end_ptr != 0)
4535   {
4536   if (range_right >= 0)
4537     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4538   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4539   if (range_right >= 0)
4540     {
4541     quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4542     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4543     JUMPHERE(quit);
4544     }
4545   }
4546 else
4547   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4548 return TRUE;
4549 }
4550 
4551 #undef MAX_N_CHARS
4552 
fast_forward_first_char(compiler_common * common,PCRE2_UCHAR first_char,BOOL caseless)4553 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless)
4554 {
4555 PCRE2_UCHAR oc;
4556 
4557 oc = first_char;
4558 if (caseless)
4559   {
4560   oc = TABLE_GET(first_char, common->fcc, first_char);
4561 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
4562   if (first_char > 127 && common->utf)
4563     oc = UCD_OTHERCASE(first_char);
4564 #endif
4565   }
4566 
4567 fast_forward_first_char2(common, first_char, oc, 0);
4568 }
4569 
fast_forward_newline(compiler_common * common)4570 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4571 {
4572 DEFINE_COMPILER;
4573 struct sljit_label *loop;
4574 struct sljit_jump *lastchar;
4575 struct sljit_jump *firstchar;
4576 struct sljit_jump *quit;
4577 struct sljit_jump *foundcr = NULL;
4578 struct sljit_jump *notfoundnl;
4579 jump_list *newline = NULL;
4580 
4581 if (common->match_end_ptr != 0)
4582   {
4583   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4584   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585   }
4586 
4587 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4588   {
4589   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4590   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4591   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4592   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4593   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4594 
4595   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4596   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4597   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4598 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4599   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4600 #endif
4601   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4602 
4603   loop = LABEL();
4604   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4605   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4606   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4607   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4608   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4609   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4610 
4611   JUMPHERE(quit);
4612   JUMPHERE(firstchar);
4613   JUMPHERE(lastchar);
4614 
4615   if (common->match_end_ptr != 0)
4616     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4617   return;
4618   }
4619 
4620 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4622 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4623 skip_char_back(common);
4624 
4625 loop = LABEL();
4626 common->ff_newline_shortcut = loop;
4627 
4628 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4629 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4630 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4631   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4632 check_newlinechar(common, common->nltype, &newline, FALSE);
4633 set_jumps(newline, loop);
4634 
4635 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4636   {
4637   quit = JUMP(SLJIT_JUMP);
4638   JUMPHERE(foundcr);
4639   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4640   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4641   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4642   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4643 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4644   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4645 #endif
4646   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4647   JUMPHERE(notfoundnl);
4648   JUMPHERE(quit);
4649   }
4650 JUMPHERE(lastchar);
4651 JUMPHERE(firstchar);
4652 
4653 if (common->match_end_ptr != 0)
4654   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4655 }
4656 
4657 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4658 
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4659 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4660 {
4661 DEFINE_COMPILER;
4662 struct sljit_label *start;
4663 struct sljit_jump *quit;
4664 struct sljit_jump *found = NULL;
4665 jump_list *matches = NULL;
4666 #if PCRE2_CODE_UNIT_WIDTH != 8
4667 struct sljit_jump *jump;
4668 #endif
4669 
4670 if (common->match_end_ptr != 0)
4671   {
4672   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4673   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4674   }
4675 
4676 start = LABEL();
4677 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4679 #ifdef SUPPORT_UNICODE
4680 if (common->utf)
4681   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4682 #endif
4683 
4684 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4685   {
4686 #if PCRE2_CODE_UNIT_WIDTH != 8
4687   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4688   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4689   JUMPHERE(jump);
4690 #endif
4691   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4692   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4693   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4694   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4695   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4696   found = JUMP(SLJIT_NOT_ZERO);
4697   }
4698 
4699 #ifdef SUPPORT_UNICODE
4700 if (common->utf)
4701   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4702 #endif
4703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4704 #ifdef SUPPORT_UNICODE
4705 #if PCRE2_CODE_UNIT_WIDTH == 8
4706 if (common->utf)
4707   {
4708   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4709   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4710   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4711   }
4712 #elif PCRE2_CODE_UNIT_WIDTH == 16
4713 if (common->utf)
4714   {
4715   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4716   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4717   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4718   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4719   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4720   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4721   }
4722 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
4723 #endif /* SUPPORT_UNICODE */
4724 JUMPTO(SLJIT_JUMP, start);
4725 if (found != NULL)
4726   JUMPHERE(found);
4727 if (matches != NULL)
4728   set_jumps(matches, LABEL());
4729 JUMPHERE(quit);
4730 
4731 if (common->match_end_ptr != 0)
4732   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4733 }
4734 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)4735 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
4736 {
4737 DEFINE_COMPILER;
4738 struct sljit_label *loop;
4739 struct sljit_jump *toolong;
4740 struct sljit_jump *alreadyfound;
4741 struct sljit_jump *found;
4742 struct sljit_jump *foundoc = NULL;
4743 struct sljit_jump *notfound;
4744 sljit_u32 oc, bit;
4745 
4746 SLJIT_ASSERT(common->req_char_ptr != 0);
4747 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4748 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
4749 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4750 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4751 
4752 if (has_firstchar)
4753   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4754 else
4755   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4756 
4757 loop = LABEL();
4758 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4759 
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4761 oc = req_char;
4762 if (caseless)
4763   {
4764   oc = TABLE_GET(req_char, common->fcc, req_char);
4765 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
4766   if (req_char > 127 && common->utf)
4767     oc = UCD_OTHERCASE(req_char);
4768 #endif
4769   }
4770 if (req_char == oc)
4771   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4772 else
4773   {
4774   bit = req_char ^ oc;
4775   if (is_powerof2(bit))
4776     {
4777     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4778     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4779     }
4780   else
4781     {
4782     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4783     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4784     }
4785   }
4786 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4787 JUMPTO(SLJIT_JUMP, loop);
4788 
4789 JUMPHERE(found);
4790 if (foundoc)
4791   JUMPHERE(foundoc);
4792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4793 JUMPHERE(alreadyfound);
4794 JUMPHERE(toolong);
4795 return notfound;
4796 }
4797 
do_revertframes(compiler_common * common)4798 static void do_revertframes(compiler_common *common)
4799 {
4800 DEFINE_COMPILER;
4801 struct sljit_jump *jump;
4802 struct sljit_label *mainloop;
4803 
4804 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4805 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4806 GET_LOCAL_BASE(TMP3, 0, 0);
4807 
4808 /* Drop frames until we reach STACK_TOP. */
4809 mainloop = LABEL();
4810 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4811 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4812 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4813 
4814 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4815 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4816 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4817 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4818 JUMPTO(SLJIT_JUMP, mainloop);
4819 
4820 JUMPHERE(jump);
4821 jump = JUMP(SLJIT_SIG_LESS);
4822 /* End of dropping frames. */
4823 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4824 
4825 JUMPHERE(jump);
4826 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4827 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4828 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4829 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4830 JUMPTO(SLJIT_JUMP, mainloop);
4831 }
4832 
check_wordboundary(compiler_common * common)4833 static void check_wordboundary(compiler_common *common)
4834 {
4835 DEFINE_COMPILER;
4836 struct sljit_jump *skipread;
4837 jump_list *skipread_list = NULL;
4838 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
4839 struct sljit_jump *jump;
4840 #endif
4841 
4842 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4843 
4844 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4845 /* Get type of the previous char, and put it to LOCALS1. */
4846 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4849 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4850 skip_char_back(common);
4851 check_start_used_ptr(common);
4852 read_char(common);
4853 
4854 /* Testing char type. */
4855 #ifdef SUPPORT_UNICODE
4856 if (common->use_ucp)
4857   {
4858   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4859   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4860   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4861   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4862   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4863   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4864   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4865   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4866   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4867   JUMPHERE(jump);
4868   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4869   }
4870 else
4871 #endif
4872   {
4873 #if PCRE2_CODE_UNIT_WIDTH != 8
4874   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4875 #elif defined SUPPORT_UNICODE
4876   /* Here LOCALS1 has already been zeroed. */
4877   jump = NULL;
4878   if (common->utf)
4879     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4880 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4881   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4882   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4883   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4884   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4885 #if PCRE2_CODE_UNIT_WIDTH != 8
4886   JUMPHERE(jump);
4887 #elif defined SUPPORT_UNICODE
4888   if (jump != NULL)
4889     JUMPHERE(jump);
4890 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4891   }
4892 JUMPHERE(skipread);
4893 
4894 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4895 check_str_end(common, &skipread_list);
4896 peek_char(common, READ_CHAR_MAX);
4897 
4898 /* Testing char type. This is a code duplication. */
4899 #ifdef SUPPORT_UNICODE
4900 if (common->use_ucp)
4901   {
4902   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4903   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4904   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4905   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4906   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4907   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4908   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4909   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4910   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4911   JUMPHERE(jump);
4912   }
4913 else
4914 #endif
4915   {
4916 #if PCRE2_CODE_UNIT_WIDTH != 8
4917   /* TMP2 may be destroyed by peek_char. */
4918   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4919   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4920 #elif defined SUPPORT_UNICODE
4921   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4922   jump = NULL;
4923   if (common->utf)
4924     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4925 #endif
4926   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4927   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4928   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4929 #if PCRE2_CODE_UNIT_WIDTH != 8
4930   JUMPHERE(jump);
4931 #elif defined SUPPORT_UNICODE
4932   if (jump != NULL)
4933     JUMPHERE(jump);
4934 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4935   }
4936 set_jumps(skipread_list, LABEL());
4937 
4938 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4939 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4940 }
4941 
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4942 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4943 {
4944 /* May destroy TMP1. */
4945 DEFINE_COMPILER;
4946 int ranges[MAX_RANGE_SIZE];
4947 sljit_u8 bit, cbit, all;
4948 int i, byte, length = 0;
4949 
4950 bit = bits[0] & 0x1;
4951 /* All bits will be zero or one (since bit is zero or one). */
4952 all = -bit;
4953 
4954 for (i = 0; i < 256; )
4955   {
4956   byte = i >> 3;
4957   if ((i & 0x7) == 0 && bits[byte] == all)
4958     i += 8;
4959   else
4960     {
4961     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4962     if (cbit != bit)
4963       {
4964       if (length >= MAX_RANGE_SIZE)
4965         return FALSE;
4966       ranges[length] = i;
4967       length++;
4968       bit = cbit;
4969       all = -cbit;
4970       }
4971     i++;
4972     }
4973   }
4974 
4975 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4976   {
4977   if (length >= MAX_RANGE_SIZE)
4978     return FALSE;
4979   ranges[length] = 256;
4980   length++;
4981   }
4982 
4983 if (length < 0 || length > 4)
4984   return FALSE;
4985 
4986 bit = bits[0] & 0x1;
4987 if (invert) bit ^= 0x1;
4988 
4989 /* No character is accepted. */
4990 if (length == 0 && bit == 0)
4991   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4992 
4993 switch(length)
4994   {
4995   case 0:
4996   /* When bit != 0, all characters are accepted. */
4997   return TRUE;
4998 
4999   case 1:
5000   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5001   return TRUE;
5002 
5003   case 2:
5004   if (ranges[0] + 1 != ranges[1])
5005     {
5006     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5007     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5008     }
5009   else
5010     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5011   return TRUE;
5012 
5013   case 3:
5014   if (bit != 0)
5015     {
5016     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5017     if (ranges[0] + 1 != ranges[1])
5018       {
5019       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5020       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5021       }
5022     else
5023       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5024     return TRUE;
5025     }
5026 
5027   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5028   if (ranges[1] + 1 != ranges[2])
5029     {
5030     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5031     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5032     }
5033   else
5034     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5035   return TRUE;
5036 
5037   case 4:
5038   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5039       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5040       && (ranges[1] & (ranges[2] - ranges[0])) == 0
5041       && is_powerof2(ranges[2] - ranges[0]))
5042     {
5043     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5044     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5045     if (ranges[2] + 1 != ranges[3])
5046       {
5047       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5048       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5049       }
5050     else
5051       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5052     return TRUE;
5053     }
5054 
5055   if (bit != 0)
5056     {
5057     i = 0;
5058     if (ranges[0] + 1 != ranges[1])
5059       {
5060       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5061       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5062       i = ranges[0];
5063       }
5064     else
5065       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5066 
5067     if (ranges[2] + 1 != ranges[3])
5068       {
5069       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5070       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5071       }
5072     else
5073       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5074     return TRUE;
5075     }
5076 
5077   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5078   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5079   if (ranges[1] + 1 != ranges[2])
5080     {
5081     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5082     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5083     }
5084   else
5085     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5086   return TRUE;
5087 
5088   default:
5089   SLJIT_ASSERT_STOP();
5090   return FALSE;
5091   }
5092 }
5093 
check_anynewline(compiler_common * common)5094 static void check_anynewline(compiler_common *common)
5095 {
5096 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5097 DEFINE_COMPILER;
5098 
5099 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5100 
5101 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5105 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5106 #if PCRE2_CODE_UNIT_WIDTH == 8
5107 if (common->utf)
5108   {
5109 #endif
5110   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5111   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5112   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5113 #if PCRE2_CODE_UNIT_WIDTH == 8
5114   }
5115 #endif
5116 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5117 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5118 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5119 }
5120 
check_hspace(compiler_common * common)5121 static void check_hspace(compiler_common *common)
5122 {
5123 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5124 DEFINE_COMPILER;
5125 
5126 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5127 
5128 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5129 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5130 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5131 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5132 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5133 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5134 #if PCRE2_CODE_UNIT_WIDTH == 8
5135 if (common->utf)
5136   {
5137 #endif
5138   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5139   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5140   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5141   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5142   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5143   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5144   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5145   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5146   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5147   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5148   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5149   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5150   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5151 #if PCRE2_CODE_UNIT_WIDTH == 8
5152   }
5153 #endif
5154 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5155 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5156 
5157 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5158 }
5159 
check_vspace(compiler_common * common)5160 static void check_vspace(compiler_common *common)
5161 {
5162 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5163 DEFINE_COMPILER;
5164 
5165 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5166 
5167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5171 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5172 #if PCRE2_CODE_UNIT_WIDTH == 8
5173 if (common->utf)
5174   {
5175 #endif
5176   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5177   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5178   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5179 #if PCRE2_CODE_UNIT_WIDTH == 8
5180   }
5181 #endif
5182 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5183 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5184 
5185 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5186 }
5187 
5188 #define CHAR1 STR_END
5189 #define CHAR2 STACK_TOP
5190 
do_casefulcmp(compiler_common * common)5191 static void do_casefulcmp(compiler_common *common)
5192 {
5193 DEFINE_COMPILER;
5194 struct sljit_jump *jump;
5195 struct sljit_label *label;
5196 
5197 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5199 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5201 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5203 
5204 label = LABEL();
5205 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5206 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5207 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5208 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5209 JUMPTO(SLJIT_NOT_ZERO, label);
5210 
5211 JUMPHERE(jump);
5212 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5213 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5214 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5215 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5216 }
5217 
5218 #define LCC_TABLE STACK_LIMIT
5219 
do_caselesscmp(compiler_common * common)5220 static void do_caselesscmp(compiler_common *common)
5221 {
5222 DEFINE_COMPILER;
5223 struct sljit_jump *jump;
5224 struct sljit_label *label;
5225 
5226 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5227 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5228 
5229 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5232 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5233 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5234 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 
5236 label = LABEL();
5237 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5238 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5239 #if PCRE2_CODE_UNIT_WIDTH != 8
5240 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5241 #endif
5242 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5243 #if PCRE2_CODE_UNIT_WIDTH != 8
5244 JUMPHERE(jump);
5245 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5246 #endif
5247 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5248 #if PCRE2_CODE_UNIT_WIDTH != 8
5249 JUMPHERE(jump);
5250 #endif
5251 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5252 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5253 JUMPTO(SLJIT_NOT_ZERO, label);
5254 
5255 JUMPHERE(jump);
5256 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5257 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5258 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5259 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5260 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5261 }
5262 
5263 #undef LCC_TABLE
5264 #undef CHAR1
5265 #undef CHAR2
5266 
5267 #if defined SUPPORT_UNICODE
5268 
do_utf_caselesscmp(PCRE2_SPTR src1,jit_arguments * args,PCRE2_SPTR end1)5269 static PCRE2_SPTR SLJIT_CALL do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1)
5270 {
5271 /* This function would be ineffective to do in JIT level. */
5272 sljit_u32 c1, c2;
5273 PCRE2_SPTR src2 = args->startchar_ptr;
5274 PCRE2_SPTR end2 = args->end;
5275 const ucd_record *ur;
5276 const sljit_u32 *pp;
5277 
5278 while (src1 < end1)
5279   {
5280   if (src2 >= end2)
5281     return (PCRE2_SPTR)1;
5282   GETCHARINC(c1, src1);
5283   GETCHARINC(c2, src2);
5284   ur = GET_UCD(c2);
5285   if (c1 != c2 && c1 != c2 + ur->other_case)
5286     {
5287     pp = PRIV(ucd_caseless_sets) + ur->caseset;
5288     for (;;)
5289       {
5290       if (c1 < *pp) return NULL;
5291       if (c1 == *pp++) break;
5292       }
5293     }
5294   }
5295 return src2;
5296 }
5297 
5298 #endif /* SUPPORT_UNICODE */
5299 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)5300 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
5301     compare_context *context, jump_list **backtracks)
5302 {
5303 DEFINE_COMPILER;
5304 unsigned int othercasebit = 0;
5305 PCRE2_SPTR othercasechar = NULL;
5306 #ifdef SUPPORT_UNICODE
5307 int utflength;
5308 #endif
5309 
5310 if (caseless && char_has_othercase(common, cc))
5311   {
5312   othercasebit = char_get_othercase_bit(common, cc);
5313   SLJIT_ASSERT(othercasebit);
5314   /* Extracting bit difference info. */
5315 #if PCRE2_CODE_UNIT_WIDTH == 8
5316   othercasechar = cc + (othercasebit >> 8);
5317   othercasebit &= 0xff;
5318 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319   /* Note that this code only handles characters in the BMP. If there
5320   ever are characters outside the BMP whose othercase differs in only one
5321   bit from itself (there currently are none), this code will need to be
5322   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
5323   othercasechar = cc + (othercasebit >> 9);
5324   if ((othercasebit & 0x100) != 0)
5325     othercasebit = (othercasebit & 0xff) << 8;
5326   else
5327     othercasebit &= 0xff;
5328 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
5329   }
5330 
5331 if (context->sourcereg == -1)
5332   {
5333 #if PCRE2_CODE_UNIT_WIDTH == 8
5334 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5335   if (context->length >= 4)
5336     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5337   else if (context->length >= 2)
5338     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5339   else
5340 #endif
5341     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5342 #elif PCRE2_CODE_UNIT_WIDTH == 16
5343 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5344   if (context->length >= 4)
5345     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5346   else
5347 #endif
5348     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5349 #elif PCRE2_CODE_UNIT_WIDTH == 32
5350   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5351 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
5352   context->sourcereg = TMP2;
5353   }
5354 
5355 #ifdef SUPPORT_UNICODE
5356 utflength = 1;
5357 if (common->utf && HAS_EXTRALEN(*cc))
5358   utflength += GET_EXTRALEN(*cc);
5359 
5360 do
5361   {
5362 #endif
5363 
5364   context->length -= IN_UCHARS(1);
5365 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
5366 
5367   /* Unaligned read is supported. */
5368   if (othercasebit != 0 && othercasechar == cc)
5369     {
5370     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5371     context->oc.asuchars[context->ucharptr] = othercasebit;
5372     }
5373   else
5374     {
5375     context->c.asuchars[context->ucharptr] = *cc;
5376     context->oc.asuchars[context->ucharptr] = 0;
5377     }
5378   context->ucharptr++;
5379 
5380 #if PCRE2_CODE_UNIT_WIDTH == 8
5381   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5382 #else
5383   if (context->ucharptr >= 2 || context->length == 0)
5384 #endif
5385     {
5386     if (context->length >= 4)
5387       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5388     else if (context->length >= 2)
5389       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5390 #if PCRE2_CODE_UNIT_WIDTH == 8
5391     else if (context->length >= 1)
5392       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5393 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5394     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5395 
5396     switch(context->ucharptr)
5397       {
5398       case 4 / sizeof(PCRE2_UCHAR):
5399       if (context->oc.asint != 0)
5400         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5401       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5402       break;
5403 
5404       case 2 / sizeof(PCRE2_UCHAR):
5405       if (context->oc.asushort != 0)
5406         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5407       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5408       break;
5409 
5410 #if PCRE2_CODE_UNIT_WIDTH == 8
5411       case 1:
5412       if (context->oc.asbyte != 0)
5413         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5414       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5415       break;
5416 #endif
5417 
5418       default:
5419       SLJIT_ASSERT_STOP();
5420       break;
5421       }
5422     context->ucharptr = 0;
5423     }
5424 
5425 #else
5426 
5427   /* Unaligned read is unsupported or in 32 bit mode. */
5428   if (context->length >= 1)
5429     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5430 
5431   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5432 
5433   if (othercasebit != 0 && othercasechar == cc)
5434     {
5435     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5436     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5437     }
5438   else
5439     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5440 
5441 #endif
5442 
5443   cc++;
5444 #ifdef SUPPORT_UNICODE
5445   utflength--;
5446   }
5447 while (utflength > 0);
5448 #endif
5449 
5450 return cc;
5451 }
5452 
5453 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5454 
5455 #define SET_TYPE_OFFSET(value) \
5456   if ((value) != typeoffset) \
5457     { \
5458     if ((value) < typeoffset) \
5459       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5460     else \
5461       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5462     } \
5463   typeoffset = (value);
5464 
5465 #define SET_CHAR_OFFSET(value) \
5466   if ((value) != charoffset) \
5467     { \
5468     if ((value) < charoffset) \
5469       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5470     else \
5471       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5472     } \
5473   charoffset = (value);
5474 
5475 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
5476 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)5477 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
5478 {
5479 DEFINE_COMPILER;
5480 jump_list *found = NULL;
5481 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5482 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5483 struct sljit_jump *jump = NULL;
5484 PCRE2_SPTR ccbegin;
5485 int compares, invertcmp, numberofcmps;
5486 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
5487 BOOL utf = common->utf;
5488 #endif
5489 
5490 #ifdef SUPPORT_UNICODE
5491 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5492 BOOL charsaved = FALSE;
5493 int typereg = TMP1;
5494 const sljit_u32 *other_cases;
5495 sljit_uw typeoffset;
5496 #endif
5497 
5498 /* Scanning the necessary info. */
5499 cc++;
5500 ccbegin = cc;
5501 compares = 0;
5502 
5503 if (cc[-1] & XCL_MAP)
5504   {
5505   min = 0;
5506   cc += 32 / sizeof(PCRE2_UCHAR);
5507   }
5508 
5509 while (*cc != XCL_END)
5510   {
5511   compares++;
5512   if (*cc == XCL_SINGLE)
5513     {
5514     cc ++;
5515     GETCHARINCTEST(c, cc);
5516     if (c > max) max = c;
5517     if (c < min) min = c;
5518 #ifdef SUPPORT_UNICODE
5519     needschar = TRUE;
5520 #endif
5521     }
5522   else if (*cc == XCL_RANGE)
5523     {
5524     cc ++;
5525     GETCHARINCTEST(c, cc);
5526     if (c < min) min = c;
5527     GETCHARINCTEST(c, cc);
5528     if (c > max) max = c;
5529 #ifdef SUPPORT_UNICODE
5530     needschar = TRUE;
5531 #endif
5532     }
5533 #ifdef SUPPORT_UNICODE
5534   else
5535     {
5536     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5537     cc++;
5538     if (*cc == PT_CLIST)
5539       {
5540       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5541       while (*other_cases != NOTACHAR)
5542         {
5543         if (*other_cases > max) max = *other_cases;
5544         if (*other_cases < min) min = *other_cases;
5545         other_cases++;
5546         }
5547       }
5548     else
5549       {
5550       max = READ_CHAR_MAX;
5551       min = 0;
5552       }
5553 
5554     switch(*cc)
5555       {
5556       case PT_ANY:
5557       /* Any either accepts everything or ignored. */
5558       if (cc[-1] == XCL_PROP)
5559         {
5560         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5561         if (list == backtracks)
5562           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5563         return;
5564         }
5565       break;
5566 
5567       case PT_LAMP:
5568       case PT_GC:
5569       case PT_PC:
5570       case PT_ALNUM:
5571       needstype = TRUE;
5572       break;
5573 
5574       case PT_SC:
5575       needsscript = TRUE;
5576       break;
5577 
5578       case PT_SPACE:
5579       case PT_PXSPACE:
5580       case PT_WORD:
5581       case PT_PXGRAPH:
5582       case PT_PXPRINT:
5583       case PT_PXPUNCT:
5584       needstype = TRUE;
5585       needschar = TRUE;
5586       break;
5587 
5588       case PT_CLIST:
5589       case PT_UCNC:
5590       needschar = TRUE;
5591       break;
5592 
5593       default:
5594       SLJIT_ASSERT_STOP();
5595       break;
5596       }
5597     cc += 2;
5598     }
5599 #endif
5600   }
5601 SLJIT_ASSERT(compares > 0);
5602 
5603 /* We are not necessary in utf mode even in 8 bit mode. */
5604 cc = ccbegin;
5605 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5606 
5607 if ((cc[-1] & XCL_HASPROP) == 0)
5608   {
5609   if ((cc[-1] & XCL_MAP) != 0)
5610     {
5611     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5612     if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5613       {
5614       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5615       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5616       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5617       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5618       OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5619       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5620       }
5621 
5622     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5623     JUMPHERE(jump);
5624 
5625     cc += 32 / sizeof(PCRE2_UCHAR);
5626     }
5627   else
5628     {
5629     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5630     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5631     }
5632   }
5633 else if ((cc[-1] & XCL_MAP) != 0)
5634   {
5635   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5636 #ifdef SUPPORT_UNICODE
5637   charsaved = TRUE;
5638 #endif
5639   if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5640     {
5641 #if PCRE2_CODE_UNIT_WIDTH == 8
5642     jump = NULL;
5643     if (common->utf)
5644 #endif
5645       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5646 
5647     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5648     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5649     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5650     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5651     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5652     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5653 
5654 #if PCRE2_CODE_UNIT_WIDTH == 8
5655     if (common->utf)
5656 #endif
5657       JUMPHERE(jump);
5658     }
5659 
5660   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5661   cc += 32 / sizeof(PCRE2_UCHAR);
5662   }
5663 
5664 #ifdef SUPPORT_UNICODE
5665 if (needstype || needsscript)
5666   {
5667   if (needschar && !charsaved)
5668     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5669 
5670   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5671   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5672   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5673   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5674   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5675   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5676   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5677 
5678   /* Before anything else, we deal with scripts. */
5679   if (needsscript)
5680     {
5681     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5682     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5683 
5684     ccbegin = cc;
5685 
5686     while (*cc != XCL_END)
5687       {
5688       if (*cc == XCL_SINGLE)
5689         {
5690         cc ++;
5691         GETCHARINCTEST(c, cc);
5692         }
5693       else if (*cc == XCL_RANGE)
5694         {
5695         cc ++;
5696         GETCHARINCTEST(c, cc);
5697         GETCHARINCTEST(c, cc);
5698         }
5699       else
5700         {
5701         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5702         cc++;
5703         if (*cc == PT_SC)
5704           {
5705           compares--;
5706           invertcmp = (compares == 0 && list != backtracks);
5707           if (cc[-1] == XCL_NOTPROP)
5708             invertcmp ^= 0x1;
5709           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5710           add_jump(compiler, compares > 0 ? list : backtracks, jump);
5711           }
5712         cc += 2;
5713         }
5714       }
5715 
5716     cc = ccbegin;
5717     }
5718 
5719   if (needschar)
5720     {
5721     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5722     }
5723 
5724   if (needstype)
5725     {
5726     if (!needschar)
5727       {
5728       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5729       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5730       }
5731     else
5732       {
5733       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5734       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5735       typereg = RETURN_ADDR;
5736       }
5737     }
5738   }
5739 #endif
5740 
5741 /* Generating code. */
5742 charoffset = 0;
5743 numberofcmps = 0;
5744 #ifdef SUPPORT_UNICODE
5745 typeoffset = 0;
5746 #endif
5747 
5748 while (*cc != XCL_END)
5749   {
5750   compares--;
5751   invertcmp = (compares == 0 && list != backtracks);
5752   jump = NULL;
5753 
5754   if (*cc == XCL_SINGLE)
5755     {
5756     cc ++;
5757     GETCHARINCTEST(c, cc);
5758 
5759     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5760       {
5761       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5762       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5763       numberofcmps++;
5764       }
5765     else if (numberofcmps > 0)
5766       {
5767       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5768       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5769       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5770       numberofcmps = 0;
5771       }
5772     else
5773       {
5774       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5775       numberofcmps = 0;
5776       }
5777     }
5778   else if (*cc == XCL_RANGE)
5779     {
5780     cc ++;
5781     GETCHARINCTEST(c, cc);
5782     SET_CHAR_OFFSET(c);
5783     GETCHARINCTEST(c, cc);
5784 
5785     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5786       {
5787       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5788       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5789       numberofcmps++;
5790       }
5791     else if (numberofcmps > 0)
5792       {
5793       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5794       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5795       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5796       numberofcmps = 0;
5797       }
5798     else
5799       {
5800       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5801       numberofcmps = 0;
5802       }
5803     }
5804 #ifdef SUPPORT_UNICODE
5805   else
5806     {
5807     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5808     if (*cc == XCL_NOTPROP)
5809       invertcmp ^= 0x1;
5810     cc++;
5811     switch(*cc)
5812       {
5813       case PT_ANY:
5814       if (!invertcmp)
5815         jump = JUMP(SLJIT_JUMP);
5816       break;
5817 
5818       case PT_LAMP:
5819       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5820       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5821       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5822       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5823       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5824       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5825       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5826       break;
5827 
5828       case PT_GC:
5829       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5830       SET_TYPE_OFFSET(c);
5831       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5832       break;
5833 
5834       case PT_PC:
5835       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5836       break;
5837 
5838       case PT_SC:
5839       compares++;
5840       /* Do nothing. */
5841       break;
5842 
5843       case PT_SPACE:
5844       case PT_PXSPACE:
5845       SET_CHAR_OFFSET(9);
5846       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5847       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5848 
5849       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5850       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5851 
5852       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5853       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5854 
5855       SET_TYPE_OFFSET(ucp_Zl);
5856       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5857       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5858       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5859       break;
5860 
5861       case PT_WORD:
5862       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5863       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5864       /* Fall through. */
5865 
5866       case PT_ALNUM:
5867       SET_TYPE_OFFSET(ucp_Ll);
5868       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5869       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5870       SET_TYPE_OFFSET(ucp_Nd);
5871       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5872       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5873       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5874       break;
5875 
5876       case PT_CLIST:
5877       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5878 
5879       /* At least three characters are required.
5880          Otherwise this case would be handled by the normal code path. */
5881       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5882       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5883 
5884       /* Optimizing character pairs, if their difference is power of 2. */
5885       if (is_powerof2(other_cases[1] ^ other_cases[0]))
5886         {
5887         if (charoffset == 0)
5888           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5889         else
5890           {
5891           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5892           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5893           }
5894         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5895         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5896         other_cases += 2;
5897         }
5898       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5899         {
5900         if (charoffset == 0)
5901           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5902         else
5903           {
5904           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5905           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5906           }
5907         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5908         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5909 
5910         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5911         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5912 
5913         other_cases += 3;
5914         }
5915       else
5916         {
5917         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5918         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5919         }
5920 
5921       while (*other_cases != NOTACHAR)
5922         {
5923         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5924         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5925         }
5926       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5927       break;
5928 
5929       case PT_UCNC:
5930       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5931       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5932       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5933       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5934       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5935       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5936 
5937       SET_CHAR_OFFSET(0xa0);
5938       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5939       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5940       SET_CHAR_OFFSET(0);
5941       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5942       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5943       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5944       break;
5945 
5946       case PT_PXGRAPH:
5947       /* C and Z groups are the farthest two groups. */
5948       SET_TYPE_OFFSET(ucp_Ll);
5949       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5950       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5951 
5952       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5953 
5954       /* In case of ucp_Cf, we overwrite the result. */
5955       SET_CHAR_OFFSET(0x2066);
5956       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5957       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5958 
5959       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5960       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5961 
5962       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5963       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5964 
5965       JUMPHERE(jump);
5966       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5967       break;
5968 
5969       case PT_PXPRINT:
5970       /* C and Z groups are the farthest two groups. */
5971       SET_TYPE_OFFSET(ucp_Ll);
5972       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5973       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5974 
5975       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5976       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5977 
5978       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5979 
5980       /* In case of ucp_Cf, we overwrite the result. */
5981       SET_CHAR_OFFSET(0x2066);
5982       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5983       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5984 
5985       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5986       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5987 
5988       JUMPHERE(jump);
5989       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5990       break;
5991 
5992       case PT_PXPUNCT:
5993       SET_TYPE_OFFSET(ucp_Sc);
5994       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5995       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5996 
5997       SET_CHAR_OFFSET(0);
5998       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5999       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
6000 
6001       SET_TYPE_OFFSET(ucp_Pc);
6002       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6003       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
6004       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6005       break;
6006 
6007       default:
6008       SLJIT_ASSERT_STOP();
6009       break;
6010       }
6011     cc += 2;
6012     }
6013 #endif
6014 
6015   if (jump != NULL)
6016     add_jump(compiler, compares > 0 ? list : backtracks, jump);
6017   }
6018 
6019 if (found != NULL)
6020   set_jumps(found, LABEL());
6021 }
6022 
6023 #undef SET_TYPE_OFFSET
6024 #undef SET_CHAR_OFFSET
6025 
6026 #endif
6027 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)6028 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
6029 {
6030 DEFINE_COMPILER;
6031 int length;
6032 struct sljit_jump *jump[4];
6033 #ifdef SUPPORT_UNICODE
6034 struct sljit_label *label;
6035 #endif /* SUPPORT_UNICODE */
6036 
6037 switch(type)
6038   {
6039   case OP_SOD:
6040   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6041   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6042   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6043   return cc;
6044 
6045   case OP_SOM:
6046   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6047   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6048   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6049   return cc;
6050 
6051   case OP_NOT_WORD_BOUNDARY:
6052   case OP_WORD_BOUNDARY:
6053   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6054   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6055   return cc;
6056 
6057   case OP_EODN:
6058   /* Requires rather complex checks. */
6059   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6060   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6061     {
6062     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6063     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6064     if (common->mode == PCRE2_JIT_COMPLETE)
6065       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6066     else
6067       {
6068       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6069       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6070       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
6071       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6072       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
6073       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6074       check_partial(common, TRUE);
6075       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6076       JUMPHERE(jump[1]);
6077       }
6078     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6079     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6080     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6081     }
6082   else if (common->nltype == NLTYPE_FIXED)
6083     {
6084     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6085     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6086     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6087     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6088     }
6089   else
6090     {
6091     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6092     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6093     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6094     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6095     jump[2] = JUMP(SLJIT_GREATER);
6096     add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6097     /* Equal. */
6098     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6099     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6100     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6101 
6102     JUMPHERE(jump[1]);
6103     if (common->nltype == NLTYPE_ANYCRLF)
6104       {
6105       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6106       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6107       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6108       }
6109     else
6110       {
6111       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6112       read_char_range(common, common->nlmin, common->nlmax, TRUE);
6113       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6114       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6115       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6116       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6117       }
6118     JUMPHERE(jump[2]);
6119     JUMPHERE(jump[3]);
6120     }
6121   JUMPHERE(jump[0]);
6122   check_partial(common, FALSE);
6123   return cc;
6124 
6125   case OP_EOD:
6126   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6127   check_partial(common, FALSE);
6128   return cc;
6129 
6130   case OP_DOLL:
6131   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6132   OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
6133   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6134 
6135   if (!common->endonly)
6136     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6137   else
6138     {
6139     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6140     check_partial(common, FALSE);
6141     }
6142   return cc;
6143 
6144   case OP_DOLLM:
6145   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6146   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6147   OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
6148   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6149   check_partial(common, FALSE);
6150   jump[0] = JUMP(SLJIT_JUMP);
6151   JUMPHERE(jump[1]);
6152 
6153   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6154     {
6155     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6156     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6157     if (common->mode == PCRE2_JIT_COMPLETE)
6158       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6159     else
6160       {
6161       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6162       /* STR_PTR = STR_END - IN_UCHARS(1) */
6163       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6164       check_partial(common, TRUE);
6165       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6166       JUMPHERE(jump[1]);
6167       }
6168 
6169     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6170     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6171     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6172     }
6173   else
6174     {
6175     peek_char(common, common->nlmax);
6176     check_newlinechar(common, common->nltype, backtracks, FALSE);
6177     }
6178   JUMPHERE(jump[0]);
6179   return cc;
6180 
6181   case OP_CIRC:
6182   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6183   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6184   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6185   OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
6186   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6187   return cc;
6188 
6189   case OP_CIRCM:
6190   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6191   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6192   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6193   OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
6194   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6195   jump[0] = JUMP(SLJIT_JUMP);
6196   JUMPHERE(jump[1]);
6197 
6198   if (!common->alt_circumflex)
6199     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6200 
6201   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6202     {
6203     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6204     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6205     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6206     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6207     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6208     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6209     }
6210   else
6211     {
6212     skip_char_back(common);
6213     read_char_range(common, common->nlmin, common->nlmax, TRUE);
6214     check_newlinechar(common, common->nltype, backtracks, FALSE);
6215     }
6216   JUMPHERE(jump[0]);
6217   return cc;
6218 
6219   case OP_REVERSE:
6220   length = GET(cc, 0);
6221   if (length == 0)
6222     return cc + LINK_SIZE;
6223   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6224 #ifdef SUPPORT_UNICODE
6225   if (common->utf)
6226     {
6227     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6228     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6229     label = LABEL();
6230     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6231     skip_char_back(common);
6232     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6233     JUMPTO(SLJIT_NOT_ZERO, label);
6234     }
6235   else
6236 #endif
6237     {
6238     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6239     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6240     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6241     }
6242   check_start_used_ptr(common);
6243   return cc + LINK_SIZE;
6244   }
6245 SLJIT_ASSERT_STOP();
6246 return cc;
6247 }
6248 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)6249 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
6250 {
6251 DEFINE_COMPILER;
6252 int length;
6253 unsigned int c, oc, bit;
6254 compare_context context;
6255 struct sljit_jump *jump[3];
6256 jump_list *end_list;
6257 #ifdef SUPPORT_UNICODE
6258 struct sljit_label *label;
6259 PCRE2_UCHAR propdata[5];
6260 #endif /* SUPPORT_UNICODE */
6261 
6262 switch(type)
6263   {
6264   case OP_NOT_DIGIT:
6265   case OP_DIGIT:
6266   /* Digits are usually 0-9, so it is worth to optimize them. */
6267   if (check_str_ptr)
6268     detect_partial_match(common, backtracks);
6269 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6270   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
6271     read_char7_type(common, type == OP_NOT_DIGIT);
6272   else
6273 #endif
6274     read_char8_type(common, type == OP_NOT_DIGIT);
6275     /* Flip the starting bit in the negative case. */
6276   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6277   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6278   return cc;
6279 
6280   case OP_NOT_WHITESPACE:
6281   case OP_WHITESPACE:
6282   if (check_str_ptr)
6283     detect_partial_match(common, backtracks);
6284 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6285   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
6286     read_char7_type(common, type == OP_NOT_WHITESPACE);
6287   else
6288 #endif
6289     read_char8_type(common, type == OP_NOT_WHITESPACE);
6290   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6291   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6292   return cc;
6293 
6294   case OP_NOT_WORDCHAR:
6295   case OP_WORDCHAR:
6296   if (check_str_ptr)
6297     detect_partial_match(common, backtracks);
6298 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6299   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
6300     read_char7_type(common, type == OP_NOT_WORDCHAR);
6301   else
6302 #endif
6303     read_char8_type(common, type == OP_NOT_WORDCHAR);
6304   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6305   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6306   return cc;
6307 
6308   case OP_ANY:
6309   if (check_str_ptr)
6310     detect_partial_match(common, backtracks);
6311   read_char_range(common, common->nlmin, common->nlmax, TRUE);
6312   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6313     {
6314     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6315     end_list = NULL;
6316     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
6317       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6318     else
6319       check_str_end(common, &end_list);
6320 
6321     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6323     set_jumps(end_list, LABEL());
6324     JUMPHERE(jump[0]);
6325     }
6326   else
6327     check_newlinechar(common, common->nltype, backtracks, TRUE);
6328   return cc;
6329 
6330   case OP_ALLANY:
6331   if (check_str_ptr)
6332     detect_partial_match(common, backtracks);
6333 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6334   if (common->utf)
6335     {
6336     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6337     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6338 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
6339 #if PCRE2_CODE_UNIT_WIDTH == 8
6340     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6341     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6342     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6343 #elif PCRE2_CODE_UNIT_WIDTH == 16
6344     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6345     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6346     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6347     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6348     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6349     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6350 #endif
6351     JUMPHERE(jump[0]);
6352 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
6353     return cc;
6354     }
6355 #endif
6356   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6357   return cc;
6358 
6359   case OP_ANYBYTE:
6360   if (check_str_ptr)
6361     detect_partial_match(common, backtracks);
6362   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6363   return cc;
6364 
6365 #ifdef SUPPORT_UNICODE
6366   case OP_NOTPROP:
6367   case OP_PROP:
6368   propdata[0] = XCL_HASPROP;
6369   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6370   propdata[2] = cc[0];
6371   propdata[3] = cc[1];
6372   propdata[4] = XCL_END;
6373   if (check_str_ptr)
6374     detect_partial_match(common, backtracks);
6375   compile_xclass_matchingpath(common, propdata, backtracks);
6376   return cc + 2;
6377 #endif
6378 
6379   case OP_ANYNL:
6380   if (check_str_ptr)
6381     detect_partial_match(common, backtracks);
6382   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6383   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6384   /* We don't need to handle soft partial matching case. */
6385   end_list = NULL;
6386   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
6387     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6388   else
6389     check_str_end(common, &end_list);
6390   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6391   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6392   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6393   jump[2] = JUMP(SLJIT_JUMP);
6394   JUMPHERE(jump[0]);
6395   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6396   set_jumps(end_list, LABEL());
6397   JUMPHERE(jump[1]);
6398   JUMPHERE(jump[2]);
6399   return cc;
6400 
6401   case OP_NOT_HSPACE:
6402   case OP_HSPACE:
6403   if (check_str_ptr)
6404     detect_partial_match(common, backtracks);
6405   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6406   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6407   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6408   return cc;
6409 
6410   case OP_NOT_VSPACE:
6411   case OP_VSPACE:
6412   if (check_str_ptr)
6413     detect_partial_match(common, backtracks);
6414   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6415   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6416   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6417   return cc;
6418 
6419 #ifdef SUPPORT_UNICODE
6420   case OP_EXTUNI:
6421   if (check_str_ptr)
6422     detect_partial_match(common, backtracks);
6423   read_char(common);
6424   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6425   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6426   /* Optimize register allocation: use a real register. */
6427   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6428   OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6429 
6430   label = LABEL();
6431   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6432   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6433   read_char(common);
6434   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6435   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6436   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6437 
6438   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6439   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6440   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6441   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6442   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6443   JUMPTO(SLJIT_NOT_ZERO, label);
6444 
6445   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6446   JUMPHERE(jump[0]);
6447   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6448 
6449   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
6450     {
6451     jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6452     /* Since we successfully read a char above, partial matching must occure. */
6453     check_partial(common, TRUE);
6454     JUMPHERE(jump[0]);
6455     }
6456   return cc;
6457 #endif
6458 
6459   case OP_CHAR:
6460   case OP_CHARI:
6461   length = 1;
6462 #ifdef SUPPORT_UNICODE
6463   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6464 #endif
6465   if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
6466       && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6467     {
6468     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6469     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6470 
6471     context.length = IN_UCHARS(length);
6472     context.sourcereg = -1;
6473 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6474     context.ucharptr = 0;
6475 #endif
6476     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6477     }
6478 
6479   if (check_str_ptr)
6480     detect_partial_match(common, backtracks);
6481 #ifdef SUPPORT_UNICODE
6482   if (common->utf)
6483     {
6484     GETCHAR(c, cc);
6485     }
6486   else
6487 #endif
6488     c = *cc;
6489 
6490   if (type == OP_CHAR || !char_has_othercase(common, cc))
6491     {
6492     read_char_range(common, c, c, FALSE);
6493     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6494     return cc + length;
6495     }
6496   oc = char_othercase(common, c);
6497   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6498   bit = c ^ oc;
6499   if (is_powerof2(bit))
6500     {
6501     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6502     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6503     return cc + length;
6504     }
6505   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6506   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6507   JUMPHERE(jump[0]);
6508   return cc + length;
6509 
6510   case OP_NOT:
6511   case OP_NOTI:
6512   if (check_str_ptr)
6513     detect_partial_match(common, backtracks);
6514 
6515   length = 1;
6516 #ifdef SUPPORT_UNICODE
6517   if (common->utf)
6518     {
6519 #if PCRE2_CODE_UNIT_WIDTH == 8
6520     c = *cc;
6521     if (c < 128)
6522       {
6523       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6524       if (type == OP_NOT || !char_has_othercase(common, cc))
6525         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6526       else
6527         {
6528         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6529         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6530         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6531         }
6532       /* Skip the variable-length character. */
6533       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6534       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6535       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6536       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6537       JUMPHERE(jump[0]);
6538       return cc + 1;
6539       }
6540     else
6541 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6542       {
6543       GETCHARLEN(c, cc, length);
6544       }
6545     }
6546   else
6547 #endif /* SUPPORT_UNICODE */
6548     c = *cc;
6549 
6550   if (type == OP_NOT || !char_has_othercase(common, cc))
6551     {
6552     read_char_range(common, c, c, TRUE);
6553     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6554     }
6555   else
6556     {
6557     oc = char_othercase(common, c);
6558     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6559     bit = c ^ oc;
6560     if (is_powerof2(bit))
6561       {
6562       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6563       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6564       }
6565     else
6566       {
6567       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6568       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6569       }
6570     }
6571   return cc + length;
6572 
6573   case OP_CLASS:
6574   case OP_NCLASS:
6575   if (check_str_ptr)
6576     detect_partial_match(common, backtracks);
6577 
6578 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6579   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6580   read_char_range(common, 0, bit, type == OP_NCLASS);
6581 #else
6582   read_char_range(common, 0, 255, type == OP_NCLASS);
6583 #endif
6584 
6585   if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6586     return cc + 32 / sizeof(PCRE2_UCHAR);
6587 
6588 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6589   jump[0] = NULL;
6590   if (common->utf)
6591     {
6592     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6593     if (type == OP_CLASS)
6594       {
6595       add_jump(compiler, backtracks, jump[0]);
6596       jump[0] = NULL;
6597       }
6598     }
6599 #elif PCRE2_CODE_UNIT_WIDTH != 8
6600   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6601   if (type == OP_CLASS)
6602     {
6603     add_jump(compiler, backtracks, jump[0]);
6604     jump[0] = NULL;
6605     }
6606 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
6607 
6608   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6609   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6610   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6611   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6612   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6613   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6614 
6615 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6616   if (jump[0] != NULL)
6617     JUMPHERE(jump[0]);
6618 #endif
6619   return cc + 32 / sizeof(PCRE2_UCHAR);
6620 
6621 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6622   case OP_XCLASS:
6623   if (check_str_ptr)
6624     detect_partial_match(common, backtracks);
6625   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6626   return cc + GET(cc, 0) - 1;
6627 #endif
6628   }
6629 SLJIT_ASSERT_STOP();
6630 return cc;
6631 }
6632 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)6633 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
6634 {
6635 /* This function consumes at least one input character. */
6636 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6637 DEFINE_COMPILER;
6638 PCRE2_SPTR ccbegin = cc;
6639 compare_context context;
6640 int size;
6641 
6642 context.length = 0;
6643 do
6644   {
6645   if (cc >= ccend)
6646     break;
6647 
6648   if (*cc == OP_CHAR)
6649     {
6650     size = 1;
6651 #ifdef SUPPORT_UNICODE
6652     if (common->utf && HAS_EXTRALEN(cc[1]))
6653       size += GET_EXTRALEN(cc[1]);
6654 #endif
6655     }
6656   else if (*cc == OP_CHARI)
6657     {
6658     size = 1;
6659 #ifdef SUPPORT_UNICODE
6660     if (common->utf)
6661       {
6662       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6663         size = 0;
6664       else if (HAS_EXTRALEN(cc[1]))
6665         size += GET_EXTRALEN(cc[1]);
6666       }
6667     else
6668 #endif
6669     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6670       size = 0;
6671     }
6672   else
6673     size = 0;
6674 
6675   cc += 1 + size;
6676   context.length += IN_UCHARS(size);
6677   }
6678 while (size > 0 && context.length <= 128);
6679 
6680 cc = ccbegin;
6681 if (context.length > 0)
6682   {
6683   /* We have a fixed-length byte sequence. */
6684   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6685   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6686 
6687   context.sourcereg = -1;
6688 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6689   context.ucharptr = 0;
6690 #endif
6691   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6692   return cc;
6693   }
6694 
6695 /* A non-fixed length character will be checked if length == 0. */
6696 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6697 }
6698 
6699 /* Forward definitions. */
6700 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
6701 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6702 
6703 #define PUSH_BACKTRACK(size, ccstart, error) \
6704   do \
6705     { \
6706     backtrack = sljit_alloc_memory(compiler, (size)); \
6707     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6708       return error; \
6709     memset(backtrack, 0, size); \
6710     backtrack->prev = parent->top; \
6711     backtrack->cc = (ccstart); \
6712     parent->top = backtrack; \
6713     } \
6714   while (0)
6715 
6716 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6717   do \
6718     { \
6719     backtrack = sljit_alloc_memory(compiler, (size)); \
6720     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6721       return; \
6722     memset(backtrack, 0, size); \
6723     backtrack->prev = parent->top; \
6724     backtrack->cc = (ccstart); \
6725     parent->top = backtrack; \
6726     } \
6727   while (0)
6728 
6729 #define BACKTRACK_AS(type) ((type *)backtrack)
6730 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)6731 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
6732 {
6733 /* The OVECTOR offset goes to TMP2. */
6734 DEFINE_COMPILER;
6735 int count = GET2(cc, 1 + IMM2_SIZE);
6736 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6737 unsigned int offset;
6738 jump_list *found = NULL;
6739 
6740 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6741 
6742 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6743 
6744 count--;
6745 while (count-- > 0)
6746   {
6747   offset = GET2(slot, 0) << 1;
6748   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6749   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6750   slot += common->name_entry_size;
6751   }
6752 
6753 offset = GET2(slot, 0) << 1;
6754 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6755 if (backtracks != NULL && !common->unset_backref)
6756   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6757 
6758 set_jumps(found, LABEL());
6759 }
6760 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6761 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6762 {
6763 DEFINE_COMPILER;
6764 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6765 int offset = 0;
6766 struct sljit_jump *jump = NULL;
6767 struct sljit_jump *partial;
6768 struct sljit_jump *nopartial;
6769 
6770 if (ref)
6771   {
6772   offset = GET2(cc, 1) << 1;
6773   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6774   /* OVECTOR(1) contains the "string begin - 1" constant. */
6775   if (withchecks && !common->unset_backref)
6776     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6777   }
6778 else
6779   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6780 
6781 #if defined SUPPORT_UNICODE
6782 if (common->utf && *cc == OP_REFI)
6783   {
6784   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6785   if (ref)
6786     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6787   else
6788     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6789 
6790   if (withchecks)
6791     jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6792 
6793   /* Needed to save important temporary registers. */
6794   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6795   OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6796   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), STR_PTR, 0);
6797   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6798   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6799   if (common->mode == PCRE2_JIT_COMPLETE)
6800     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6801   else
6802     {
6803     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6804     nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6805     check_partial(common, FALSE);
6806     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6807     JUMPHERE(nopartial);
6808     }
6809   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6810   }
6811 else
6812 #endif /* SUPPORT_UNICODE */
6813   {
6814   if (ref)
6815     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6816   else
6817     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6818 
6819   if (withchecks)
6820     jump = JUMP(SLJIT_ZERO);
6821 
6822   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6823   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6824   if (common->mode == PCRE2_JIT_COMPLETE)
6825     add_jump(compiler, backtracks, partial);
6826 
6827   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6828   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6829 
6830   if (common->mode != PCRE2_JIT_COMPLETE)
6831     {
6832     nopartial = JUMP(SLJIT_JUMP);
6833     JUMPHERE(partial);
6834     /* TMP2 -= STR_END - STR_PTR */
6835     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6836     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6837     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6838     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6839     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6840     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6841     JUMPHERE(partial);
6842     check_partial(common, FALSE);
6843     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6844     JUMPHERE(nopartial);
6845     }
6846   }
6847 
6848 if (jump != NULL)
6849   {
6850   if (emptyfail)
6851     add_jump(compiler, backtracks, jump);
6852   else
6853     JUMPHERE(jump);
6854   }
6855 }
6856 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)6857 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
6858 {
6859 DEFINE_COMPILER;
6860 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6861 backtrack_common *backtrack;
6862 PCRE2_UCHAR type;
6863 int offset = 0;
6864 struct sljit_label *label;
6865 struct sljit_jump *zerolength;
6866 struct sljit_jump *jump = NULL;
6867 PCRE2_SPTR ccbegin = cc;
6868 int min = 0, max = 0;
6869 BOOL minimize;
6870 
6871 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6872 
6873 if (ref)
6874   offset = GET2(cc, 1) << 1;
6875 else
6876   cc += IMM2_SIZE;
6877 type = cc[1 + IMM2_SIZE];
6878 
6879 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6880 minimize = (type & 0x1) != 0;
6881 switch(type)
6882   {
6883   case OP_CRSTAR:
6884   case OP_CRMINSTAR:
6885   min = 0;
6886   max = 0;
6887   cc += 1 + IMM2_SIZE + 1;
6888   break;
6889   case OP_CRPLUS:
6890   case OP_CRMINPLUS:
6891   min = 1;
6892   max = 0;
6893   cc += 1 + IMM2_SIZE + 1;
6894   break;
6895   case OP_CRQUERY:
6896   case OP_CRMINQUERY:
6897   min = 0;
6898   max = 1;
6899   cc += 1 + IMM2_SIZE + 1;
6900   break;
6901   case OP_CRRANGE:
6902   case OP_CRMINRANGE:
6903   min = GET2(cc, 1 + IMM2_SIZE + 1);
6904   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6905   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6906   break;
6907   default:
6908   SLJIT_ASSERT_STOP();
6909   break;
6910   }
6911 
6912 if (!minimize)
6913   {
6914   if (min == 0)
6915     {
6916     allocate_stack(common, 2);
6917     if (ref)
6918       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6919     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6920     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6921     /* Temporary release of STR_PTR. */
6922     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6923     /* Handles both invalid and empty cases. Since the minimum repeat,
6924     is zero the invalid case is basically the same as an empty case. */
6925     if (ref)
6926       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6927     else
6928       {
6929       compile_dnref_search(common, ccbegin, NULL);
6930       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6931       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6932       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6933       }
6934     /* Restore if not zero length. */
6935     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6936     }
6937   else
6938     {
6939     allocate_stack(common, 1);
6940     if (ref)
6941       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6942     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6943     if (ref)
6944       {
6945       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6946       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6947       }
6948     else
6949       {
6950       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6951       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6952       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6953       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6954       }
6955     }
6956 
6957   if (min > 1 || max > 1)
6958     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6959 
6960   label = LABEL();
6961   if (!ref)
6962     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6963   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6964 
6965   if (min > 1 || max > 1)
6966     {
6967     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6968     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6969     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6970     if (min > 1)
6971       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6972     if (max > 1)
6973       {
6974       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6975       allocate_stack(common, 1);
6976       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6977       JUMPTO(SLJIT_JUMP, label);
6978       JUMPHERE(jump);
6979       }
6980     }
6981 
6982   if (max == 0)
6983     {
6984     /* Includes min > 1 case as well. */
6985     allocate_stack(common, 1);
6986     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6987     JUMPTO(SLJIT_JUMP, label);
6988     }
6989 
6990   JUMPHERE(zerolength);
6991   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6992 
6993   count_match(common);
6994   return cc;
6995   }
6996 
6997 allocate_stack(common, ref ? 2 : 3);
6998 if (ref)
6999   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7001 if (type != OP_CRMINSTAR)
7002   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7003 
7004 if (min == 0)
7005   {
7006   /* Handles both invalid and empty cases. Since the minimum repeat,
7007   is zero the invalid case is basically the same as an empty case. */
7008   if (ref)
7009     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7010   else
7011     {
7012     compile_dnref_search(common, ccbegin, NULL);
7013     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7014     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7015     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7016     }
7017   /* Length is non-zero, we can match real repeats. */
7018   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7019   jump = JUMP(SLJIT_JUMP);
7020   }
7021 else
7022   {
7023   if (ref)
7024     {
7025     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7026     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7027     }
7028   else
7029     {
7030     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7031     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7032     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7033     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7034     }
7035   }
7036 
7037 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7038 if (max > 0)
7039   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7040 
7041 if (!ref)
7042   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7043 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7045 
7046 if (min > 1)
7047   {
7048   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7049   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7050   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7051   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7052   }
7053 else if (max > 0)
7054   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7055 
7056 if (jump != NULL)
7057   JUMPHERE(jump);
7058 JUMPHERE(zerolength);
7059 
7060 count_match(common);
7061 return cc;
7062 }
7063 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7064 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7065 {
7066 DEFINE_COMPILER;
7067 backtrack_common *backtrack;
7068 recurse_entry *entry = common->entries;
7069 recurse_entry *prev = NULL;
7070 sljit_sw start = GET(cc, 1);
7071 PCRE2_SPTR start_cc;
7072 BOOL needs_control_head;
7073 
7074 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7075 
7076 /* Inlining simple patterns. */
7077 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7078   {
7079   start_cc = common->start + start;
7080   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7081   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7082   return cc + 1 + LINK_SIZE;
7083   }
7084 
7085 while (entry != NULL)
7086   {
7087   if (entry->start == start)
7088     break;
7089   prev = entry;
7090   entry = entry->next;
7091   }
7092 
7093 if (entry == NULL)
7094   {
7095   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7096   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7097     return NULL;
7098   entry->next = NULL;
7099   entry->entry = NULL;
7100   entry->calls = NULL;
7101   entry->start = start;
7102 
7103   if (prev != NULL)
7104     prev->next = entry;
7105   else
7106     common->entries = entry;
7107   }
7108 
7109 if (common->has_set_som && common->mark_ptr != 0)
7110   {
7111   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7112   allocate_stack(common, 2);
7113   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7114   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7115   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7116   }
7117 else if (common->has_set_som || common->mark_ptr != 0)
7118   {
7119   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7120   allocate_stack(common, 1);
7121   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7122   }
7123 
7124 if (entry->entry == NULL)
7125   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7126 else
7127   JUMPTO(SLJIT_FAST_CALL, entry->entry);
7128 /* Leave if the match is failed. */
7129 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7130 return cc + 1 + LINK_SIZE;
7131 }
7132 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)7133 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
7134 {
7135 PCRE2_SPTR begin = arguments->begin;
7136 PCRE2_SIZE *ovector = arguments->match_data->ovector;
7137 sljit_u32 oveccount = arguments->oveccount;
7138 sljit_u32 i;
7139 
7140 if (arguments->callout == NULL)
7141   return 0;
7142 
7143 callout_block->version = 1;
7144 
7145 /* Offsets in subject. */
7146 callout_block->subject_length = arguments->end - arguments->begin;
7147 callout_block->start_match = (PCRE2_SPTR)callout_block->subject - arguments->begin;
7148 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - arguments->begin;
7149 callout_block->subject = begin;
7150 
7151 /* Convert and copy the JIT offset vector to the ovector array. */
7152 callout_block->capture_top = 0;
7153 callout_block->offset_vector = ovector;
7154 for (i = 2; i < oveccount; i += 2)
7155   {
7156   ovector[i] = jit_ovector[i] - begin;
7157   ovector[i + 1] = jit_ovector[i + 1] - begin;
7158   if (jit_ovector[i] >= begin)
7159     callout_block->capture_top = i;
7160   }
7161 
7162 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7163 ovector[0] = PCRE2_UNSET;
7164 ovector[1] = PCRE2_UNSET;
7165 return (arguments->callout)(callout_block, arguments->callout_data);
7166 }
7167 
7168 /* Aligning to 8 byte. */
7169 #define CALLOUT_ARG_SIZE \
7170     (((int)sizeof(pcre2_callout_block) + 7) & ~7)
7171 
7172 #define CALLOUT_ARG_OFFSET(arg) \
7173     (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(pcre2_callout_block, arg))
7174 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7175 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7176 {
7177 DEFINE_COMPILER;
7178 backtrack_common *backtrack;
7179 sljit_s32 mov_opcode;
7180 unsigned int callout_length = (*cc == OP_CALLOUT)
7181     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
7182 sljit_sw value1;
7183 sljit_sw value2;
7184 sljit_sw value3;
7185 
7186 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7187 
7188 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7189 
7190 SLJIT_ASSERT(common->capture_last_ptr != 0);
7191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7192 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7193 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
7194 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
7195 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7196 
7197 /* These pointer sized fields temporarly stores internal variables. */
7198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7201 
7202 if (common->mark_ptr != 0)
7203   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7204 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
7205 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
7206 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
7207 
7208 if (*cc == OP_CALLOUT)
7209   {
7210   value1 = 0;
7211   value2 = 0;
7212   value3 = 0;
7213   }
7214 else
7215   {
7216   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
7217   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
7218   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
7219   }
7220 
7221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
7222 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
7223 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
7224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7225 
7226 /* Needed to save important temporary registers. */
7227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7228 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
7229 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7230 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7231 OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
7232 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7233 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7234 
7235 /* Check return value. */
7236 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7237 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
7238 if (common->forced_quit_label == NULL)
7239   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
7240 else
7241   JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
7242 return cc + callout_length;
7243 }
7244 
7245 #undef CALLOUT_ARG_SIZE
7246 #undef CALLOUT_ARG_OFFSET
7247 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)7248 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
7249 {
7250 while (TRUE)
7251   {
7252   switch (*cc)
7253     {
7254     case OP_CALLOUT_STR:
7255     cc += GET(cc, 1 + 2*LINK_SIZE);
7256     break;
7257 
7258     case OP_NOT_WORD_BOUNDARY:
7259     case OP_WORD_BOUNDARY:
7260     case OP_CIRC:
7261     case OP_CIRCM:
7262     case OP_DOLL:
7263     case OP_DOLLM:
7264     case OP_CALLOUT:
7265     case OP_ALT:
7266     cc += PRIV(OP_lengths)[*cc];
7267     break;
7268 
7269     case OP_KET:
7270     return FALSE;
7271 
7272     default:
7273     return TRUE;
7274     }
7275   }
7276 }
7277 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)7278 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
7279 {
7280 DEFINE_COMPILER;
7281 int framesize;
7282 int extrasize;
7283 BOOL needs_control_head;
7284 int private_data_ptr;
7285 backtrack_common altbacktrack;
7286 PCRE2_SPTR ccbegin;
7287 PCRE2_UCHAR opcode;
7288 PCRE2_UCHAR bra = OP_BRA;
7289 jump_list *tmp = NULL;
7290 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7291 jump_list **found;
7292 /* Saving previous accept variables. */
7293 BOOL save_local_exit = common->local_exit;
7294 BOOL save_positive_assert = common->positive_assert;
7295 then_trap_backtrack *save_then_trap = common->then_trap;
7296 struct sljit_label *save_quit_label = common->quit_label;
7297 struct sljit_label *save_accept_label = common->accept_label;
7298 jump_list *save_quit = common->quit;
7299 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7300 jump_list *save_accept = common->accept;
7301 struct sljit_jump *jump;
7302 struct sljit_jump *brajump = NULL;
7303 
7304 /* Assert captures then. */
7305 common->then_trap = NULL;
7306 
7307 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7308   {
7309   SLJIT_ASSERT(!conditional);
7310   bra = *cc;
7311   cc++;
7312   }
7313 private_data_ptr = PRIVATE_DATA(cc);
7314 SLJIT_ASSERT(private_data_ptr != 0);
7315 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7316 backtrack->framesize = framesize;
7317 backtrack->private_data_ptr = private_data_ptr;
7318 opcode = *cc;
7319 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7320 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7321 ccbegin = cc;
7322 cc += GET(cc, 1);
7323 
7324 if (bra == OP_BRAMINZERO)
7325   {
7326   /* This is a braminzero backtrack path. */
7327   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7328   free_stack(common, 1);
7329   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7330   }
7331 
7332 if (framesize < 0)
7333   {
7334   extrasize = 1;
7335   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7336     extrasize = 0;
7337 
7338   if (needs_control_head)
7339     extrasize++;
7340 
7341   if (framesize == no_frame)
7342     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7343 
7344   if (extrasize > 0)
7345     allocate_stack(common, extrasize);
7346 
7347   if (needs_control_head)
7348     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7349 
7350   if (extrasize > 0)
7351     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7352 
7353   if (needs_control_head)
7354     {
7355     SLJIT_ASSERT(extrasize == 2);
7356     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7357     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7358     }
7359   }
7360 else
7361   {
7362   extrasize = needs_control_head ? 3 : 2;
7363   allocate_stack(common, framesize + extrasize);
7364 
7365   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7366   OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7367   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7368   if (needs_control_head)
7369     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7370   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7371 
7372   if (needs_control_head)
7373     {
7374     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7375     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7376     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7377     }
7378   else
7379     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7380 
7381   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7382   }
7383 
7384 memset(&altbacktrack, 0, sizeof(backtrack_common));
7385 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7386   {
7387   /* Negative assert is stronger than positive assert. */
7388   common->local_exit = TRUE;
7389   common->quit_label = NULL;
7390   common->quit = NULL;
7391   common->positive_assert = FALSE;
7392   }
7393 else
7394   common->positive_assert = TRUE;
7395 common->positive_assert_quit = NULL;
7396 
7397 while (1)
7398   {
7399   common->accept_label = NULL;
7400   common->accept = NULL;
7401   altbacktrack.top = NULL;
7402   altbacktrack.topbacktracks = NULL;
7403 
7404   if (*ccbegin == OP_ALT && extrasize > 0)
7405     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7406 
7407   altbacktrack.cc = ccbegin;
7408   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7409   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7410     {
7411     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7412       {
7413       common->local_exit = save_local_exit;
7414       common->quit_label = save_quit_label;
7415       common->quit = save_quit;
7416       }
7417     common->positive_assert = save_positive_assert;
7418     common->then_trap = save_then_trap;
7419     common->accept_label = save_accept_label;
7420     common->positive_assert_quit = save_positive_assert_quit;
7421     common->accept = save_accept;
7422     return NULL;
7423     }
7424   common->accept_label = LABEL();
7425   if (common->accept != NULL)
7426     set_jumps(common->accept, common->accept_label);
7427 
7428   /* Reset stack. */
7429   if (framesize < 0)
7430     {
7431     if (framesize == no_frame)
7432       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7433     else if (extrasize > 0)
7434       free_stack(common, extrasize);
7435 
7436     if (needs_control_head)
7437       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7438     }
7439   else
7440     {
7441     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7442       {
7443       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7444       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7445       if (needs_control_head)
7446         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7447       }
7448     else
7449       {
7450       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7451       if (needs_control_head)
7452         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
7453       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7454       }
7455     }
7456 
7457   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7458     {
7459     /* We know that STR_PTR was stored on the top of the stack. */
7460     if (conditional)
7461       {
7462       if (extrasize > 0)
7463         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
7464       }
7465     else if (bra == OP_BRAZERO)
7466       {
7467       if (framesize < 0)
7468         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7469       else
7470         {
7471         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7472         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
7473         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7474         }
7475       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7476       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7477       }
7478     else if (framesize >= 0)
7479       {
7480       /* For OP_BRA and OP_BRAMINZERO. */
7481       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7482       }
7483     }
7484   add_jump(compiler, found, JUMP(SLJIT_JUMP));
7485 
7486   compile_backtrackingpath(common, altbacktrack.top);
7487   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7488     {
7489     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7490       {
7491       common->local_exit = save_local_exit;
7492       common->quit_label = save_quit_label;
7493       common->quit = save_quit;
7494       }
7495     common->positive_assert = save_positive_assert;
7496     common->then_trap = save_then_trap;
7497     common->accept_label = save_accept_label;
7498     common->positive_assert_quit = save_positive_assert_quit;
7499     common->accept = save_accept;
7500     return NULL;
7501     }
7502   set_jumps(altbacktrack.topbacktracks, LABEL());
7503 
7504   if (*cc != OP_ALT)
7505     break;
7506 
7507   ccbegin = cc;
7508   cc += GET(cc, 1);
7509   }
7510 
7511 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7512   {
7513   SLJIT_ASSERT(common->positive_assert_quit == NULL);
7514   /* Makes the check less complicated below. */
7515   common->positive_assert_quit = common->quit;
7516   }
7517 
7518 /* None of them matched. */
7519 if (common->positive_assert_quit != NULL)
7520   {
7521   jump = JUMP(SLJIT_JUMP);
7522   set_jumps(common->positive_assert_quit, LABEL());
7523   SLJIT_ASSERT(framesize != no_stack);
7524   if (framesize < 0)
7525     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7526   else
7527     {
7528     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7529     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7530     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7531     }
7532   JUMPHERE(jump);
7533   }
7534 
7535 if (needs_control_head)
7536   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7537 
7538 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7539   {
7540   /* Assert is failed. */
7541   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7542     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7543 
7544   if (framesize < 0)
7545     {
7546     /* The topmost item should be 0. */
7547     if (bra == OP_BRAZERO)
7548       {
7549       if (extrasize == 2)
7550         free_stack(common, 1);
7551       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7552       }
7553     else if (extrasize > 0)
7554       free_stack(common, extrasize);
7555     }
7556   else
7557     {
7558     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7559     /* The topmost item should be 0. */
7560     if (bra == OP_BRAZERO)
7561       {
7562       free_stack(common, framesize + extrasize - 1);
7563       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7564       }
7565     else
7566       free_stack(common, framesize + extrasize);
7567     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7568     }
7569   jump = JUMP(SLJIT_JUMP);
7570   if (bra != OP_BRAZERO)
7571     add_jump(compiler, target, jump);
7572 
7573   /* Assert is successful. */
7574   set_jumps(tmp, LABEL());
7575   if (framesize < 0)
7576     {
7577     /* We know that STR_PTR was stored on the top of the stack. */
7578     if (extrasize > 0)
7579       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7580 
7581     /* Keep the STR_PTR on the top of the stack. */
7582     if (bra == OP_BRAZERO)
7583       {
7584       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7585       if (extrasize == 2)
7586         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7587       }
7588     else if (bra == OP_BRAMINZERO)
7589       {
7590       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7591       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7592       }
7593     }
7594   else
7595     {
7596     if (bra == OP_BRA)
7597       {
7598       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7599       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7600       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
7601       }
7602     else
7603       {
7604       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7605       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7606       if (extrasize == 2)
7607         {
7608         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7609         if (bra == OP_BRAMINZERO)
7610           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7611         }
7612       else
7613         {
7614         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7615         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7616         }
7617       }
7618     }
7619 
7620   if (bra == OP_BRAZERO)
7621     {
7622     backtrack->matchingpath = LABEL();
7623     SET_LABEL(jump, backtrack->matchingpath);
7624     }
7625   else if (bra == OP_BRAMINZERO)
7626     {
7627     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7628     JUMPHERE(brajump);
7629     if (framesize >= 0)
7630       {
7631       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7632       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7633       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7634       }
7635     set_jumps(backtrack->common.topbacktracks, LABEL());
7636     }
7637   }
7638 else
7639   {
7640   /* AssertNot is successful. */
7641   if (framesize < 0)
7642     {
7643     if (extrasize > 0)
7644       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7645 
7646     if (bra != OP_BRA)
7647       {
7648       if (extrasize == 2)
7649         free_stack(common, 1);
7650       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7651       }
7652     else if (extrasize > 0)
7653       free_stack(common, extrasize);
7654     }
7655   else
7656     {
7657     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7658     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7659     /* The topmost item should be 0. */
7660     if (bra != OP_BRA)
7661       {
7662       free_stack(common, framesize + extrasize - 1);
7663       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7664       }
7665     else
7666       free_stack(common, framesize + extrasize);
7667     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7668     }
7669 
7670   if (bra == OP_BRAZERO)
7671     backtrack->matchingpath = LABEL();
7672   else if (bra == OP_BRAMINZERO)
7673     {
7674     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7675     JUMPHERE(brajump);
7676     }
7677 
7678   if (bra != OP_BRA)
7679     {
7680     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7681     set_jumps(backtrack->common.topbacktracks, LABEL());
7682     backtrack->common.topbacktracks = NULL;
7683     }
7684   }
7685 
7686 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7687   {
7688   common->local_exit = save_local_exit;
7689   common->quit_label = save_quit_label;
7690   common->quit = save_quit;
7691   }
7692 common->positive_assert = save_positive_assert;
7693 common->then_trap = save_then_trap;
7694 common->accept_label = save_accept_label;
7695 common->positive_assert_quit = save_positive_assert_quit;
7696 common->accept = save_accept;
7697 return cc + 1 + LINK_SIZE;
7698 }
7699 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7700 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7701 {
7702 DEFINE_COMPILER;
7703 int stacksize;
7704 
7705 if (framesize < 0)
7706   {
7707   if (framesize == no_frame)
7708     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7709   else
7710     {
7711     stacksize = needs_control_head ? 1 : 0;
7712     if (ket != OP_KET || has_alternatives)
7713       stacksize++;
7714 
7715     if (stacksize > 0)
7716       free_stack(common, stacksize);
7717     }
7718 
7719   if (needs_control_head)
7720     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
7721 
7722   /* TMP2 which is set here used by OP_KETRMAX below. */
7723   if (ket == OP_KETRMAX)
7724     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
7725   else if (ket == OP_KETRMIN)
7726     {
7727     /* Move the STR_PTR to the private_data_ptr. */
7728     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
7729     }
7730   }
7731 else
7732   {
7733   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7734   OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7735   if (needs_control_head)
7736     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
7737 
7738   if (ket == OP_KETRMAX)
7739     {
7740     /* TMP2 which is set here used by OP_KETRMAX below. */
7741     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7742     }
7743   }
7744 if (needs_control_head)
7745   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7746 }
7747 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7748 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7749 {
7750 DEFINE_COMPILER;
7751 
7752 if (common->capture_last_ptr != 0)
7753   {
7754   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7755   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7756   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7757   stacksize++;
7758   }
7759 if (common->optimized_cbracket[offset >> 1] == 0)
7760   {
7761   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7762   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7763   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7764   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7765   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7766   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7767   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7768   stacksize += 2;
7769   }
7770 return stacksize;
7771 }
7772 
7773 /*
7774   Handling bracketed expressions is probably the most complex part.
7775 
7776   Stack layout naming characters:
7777     S - Push the current STR_PTR
7778     0 - Push a 0 (NULL)
7779     A - Push the current STR_PTR. Needed for restoring the STR_PTR
7780         before the next alternative. Not pushed if there are no alternatives.
7781     M - Any values pushed by the current alternative. Can be empty, or anything.
7782     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7783     L - Push the previous local (pointed by localptr) to the stack
7784    () - opional values stored on the stack
7785   ()* - optonal, can be stored multiple times
7786 
7787   The following list shows the regular expression templates, their PCRE byte codes
7788   and stack layout supported by pcre-sljit.
7789 
7790   (?:)                     OP_BRA     | OP_KET                A M
7791   ()                       OP_CBRA    | OP_KET                C M
7792   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
7793                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
7794   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
7795                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
7796   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
7797                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
7798   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
7799                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
7800   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
7801   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
7802   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
7803   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
7804   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
7805            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
7806   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
7807            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
7808   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
7809            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
7810   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
7811            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
7812 
7813 
7814   Stack layout naming characters:
7815     A - Push the alternative index (starting from 0) on the stack.
7816         Not pushed if there is no alternatives.
7817     M - Any values pushed by the current alternative. Can be empty, or anything.
7818 
7819   The next list shows the possible content of a bracket:
7820   (|)     OP_*BRA    | OP_ALT ...         M A
7821   (?()|)  OP_*COND   | OP_ALT             M A
7822   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
7823   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
7824                                           Or nothing, if trace is unnecessary
7825 */
7826 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7827 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7828 {
7829 DEFINE_COMPILER;
7830 backtrack_common *backtrack;
7831 PCRE2_UCHAR opcode;
7832 int private_data_ptr = 0;
7833 int offset = 0;
7834 int i, stacksize;
7835 int repeat_ptr = 0, repeat_length = 0;
7836 int repeat_type = 0, repeat_count = 0;
7837 PCRE2_SPTR ccbegin;
7838 PCRE2_SPTR matchingpath;
7839 PCRE2_SPTR slot;
7840 PCRE2_UCHAR bra = OP_BRA;
7841 PCRE2_UCHAR ket;
7842 assert_backtrack *assert;
7843 BOOL has_alternatives;
7844 BOOL needs_control_head = FALSE;
7845 struct sljit_jump *jump;
7846 struct sljit_jump *skip;
7847 struct sljit_label *rmax_label = NULL;
7848 struct sljit_jump *braminzero = NULL;
7849 
7850 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7851 
7852 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7853   {
7854   bra = *cc;
7855   cc++;
7856   opcode = *cc;
7857   }
7858 
7859 opcode = *cc;
7860 ccbegin = cc;
7861 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7862 ket = *matchingpath;
7863 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7864   {
7865   repeat_ptr = PRIVATE_DATA(matchingpath);
7866   repeat_length = PRIVATE_DATA(matchingpath + 1);
7867   repeat_type = PRIVATE_DATA(matchingpath + 2);
7868   repeat_count = PRIVATE_DATA(matchingpath + 3);
7869   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7870   if (repeat_type == OP_UPTO)
7871     ket = OP_KETRMAX;
7872   if (repeat_type == OP_MINUPTO)
7873     ket = OP_KETRMIN;
7874   }
7875 
7876 matchingpath = ccbegin + 1 + LINK_SIZE;
7877 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7878 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7879 cc += GET(cc, 1);
7880 
7881 has_alternatives = *cc == OP_ALT;
7882 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7883   {
7884   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
7885     compile_time_checks_must_be_grouped_together);
7886   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7887   }
7888 
7889 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7890   opcode = OP_SCOND;
7891 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7892   opcode = OP_ONCE;
7893 
7894 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7895   {
7896   /* Capturing brackets has a pre-allocated space. */
7897   offset = GET2(ccbegin, 1 + LINK_SIZE);
7898   if (common->optimized_cbracket[offset] == 0)
7899     {
7900     private_data_ptr = OVECTOR_PRIV(offset);
7901     offset <<= 1;
7902     }
7903   else
7904     {
7905     offset <<= 1;
7906     private_data_ptr = OVECTOR(offset);
7907     }
7908   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7909   matchingpath += IMM2_SIZE;
7910   }
7911 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7912   {
7913   /* Other brackets simply allocate the next entry. */
7914   private_data_ptr = PRIVATE_DATA(ccbegin);
7915   SLJIT_ASSERT(private_data_ptr != 0);
7916   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7917   if (opcode == OP_ONCE)
7918     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7919   }
7920 
7921 /* Instructions before the first alternative. */
7922 stacksize = 0;
7923 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7924   stacksize++;
7925 if (bra == OP_BRAZERO)
7926   stacksize++;
7927 
7928 if (stacksize > 0)
7929   allocate_stack(common, stacksize);
7930 
7931 stacksize = 0;
7932 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7933   {
7934   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7935   stacksize++;
7936   }
7937 
7938 if (bra == OP_BRAZERO)
7939   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7940 
7941 if (bra == OP_BRAMINZERO)
7942   {
7943   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7944   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7945   if (ket != OP_KETRMIN)
7946     {
7947     free_stack(common, 1);
7948     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7949     }
7950   else
7951     {
7952     if (opcode == OP_ONCE || opcode >= OP_SBRA)
7953       {
7954       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7955       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7956       /* Nothing stored during the first run. */
7957       skip = JUMP(SLJIT_JUMP);
7958       JUMPHERE(jump);
7959       /* Checking zero-length iteration. */
7960       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7961         {
7962         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7963         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7964         }
7965       else
7966         {
7967         /* Except when the whole stack frame must be saved. */
7968         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7969         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7970         }
7971       JUMPHERE(skip);
7972       }
7973     else
7974       {
7975       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7976       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7977       JUMPHERE(jump);
7978       }
7979     }
7980   }
7981 
7982 if (repeat_type != 0)
7983   {
7984   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7985   if (repeat_type == OP_EXACT)
7986     rmax_label = LABEL();
7987   }
7988 
7989 if (ket == OP_KETRMIN)
7990   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7991 
7992 if (ket == OP_KETRMAX)
7993   {
7994   rmax_label = LABEL();
7995   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7996     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7997   }
7998 
7999 /* Handling capturing brackets and alternatives. */
8000 if (opcode == OP_ONCE)
8001   {
8002   stacksize = 0;
8003   if (needs_control_head)
8004     {
8005     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8006     stacksize++;
8007     }
8008 
8009   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8010     {
8011     /* Neither capturing brackets nor recursions are found in the block. */
8012     if (ket == OP_KETRMIN)
8013       {
8014       stacksize += 2;
8015       if (!needs_control_head)
8016         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8017       }
8018     else
8019       {
8020       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8021         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8022       if (ket == OP_KETRMAX || has_alternatives)
8023         stacksize++;
8024       }
8025 
8026     if (stacksize > 0)
8027       allocate_stack(common, stacksize);
8028 
8029     stacksize = 0;
8030     if (needs_control_head)
8031       {
8032       stacksize++;
8033       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8034       }
8035 
8036     if (ket == OP_KETRMIN)
8037       {
8038       if (needs_control_head)
8039         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8040       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8041       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8042         OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8043       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8044       }
8045     else if (ket == OP_KETRMAX || has_alternatives)
8046       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8047     }
8048   else
8049     {
8050     if (ket != OP_KET || has_alternatives)
8051       stacksize++;
8052 
8053     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8054     allocate_stack(common, stacksize);
8055 
8056     if (needs_control_head)
8057       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8058 
8059     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8060     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8061 
8062     stacksize = needs_control_head ? 1 : 0;
8063     if (ket != OP_KET || has_alternatives)
8064       {
8065       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8066       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8067       stacksize++;
8068       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8069       }
8070     else
8071       {
8072       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8073       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8074       }
8075     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8076     }
8077   }
8078 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8079   {
8080   /* Saving the previous values. */
8081   if (common->optimized_cbracket[offset >> 1] != 0)
8082     {
8083     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8084     allocate_stack(common, 2);
8085     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8086     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8087     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8088     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8089     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8090     }
8091   else
8092     {
8093     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8094     allocate_stack(common, 1);
8095     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8096     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8097     }
8098   }
8099 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8100   {
8101   /* Saving the previous value. */
8102   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8103   allocate_stack(common, 1);
8104   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8105   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8106   }
8107 else if (has_alternatives)
8108   {
8109   /* Pushing the starting string pointer. */
8110   allocate_stack(common, 1);
8111   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8112   }
8113 
8114 /* Generating code for the first alternative. */
8115 if (opcode == OP_COND || opcode == OP_SCOND)
8116   {
8117   if (*matchingpath == OP_CREF)
8118     {
8119     SLJIT_ASSERT(has_alternatives);
8120     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8121       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8122     matchingpath += 1 + IMM2_SIZE;
8123     }
8124   else if (*matchingpath == OP_DNCREF)
8125     {
8126     SLJIT_ASSERT(has_alternatives);
8127 
8128     i = GET2(matchingpath, 1 + IMM2_SIZE);
8129     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8130     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8131     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8132     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8133     slot += common->name_entry_size;
8134     i--;
8135     while (i-- > 0)
8136       {
8137       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8138       OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
8139       slot += common->name_entry_size;
8140       }
8141     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8142     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8143     matchingpath += 1 + 2 * IMM2_SIZE;
8144     }
8145   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
8146     {
8147     /* Never has other case. */
8148     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8149     SLJIT_ASSERT(!has_alternatives);
8150 
8151     if (*matchingpath == OP_TRUE)
8152       {
8153       stacksize = 1;
8154       matchingpath++;
8155       }
8156     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
8157       stacksize = 0;
8158     else if (*matchingpath == OP_RREF)
8159       {
8160       stacksize = GET2(matchingpath, 1);
8161       if (common->currententry == NULL)
8162         stacksize = 0;
8163       else if (stacksize == RREF_ANY)
8164         stacksize = 1;
8165       else if (common->currententry->start == 0)
8166         stacksize = stacksize == 0;
8167       else
8168         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8169 
8170       if (stacksize != 0)
8171         matchingpath += 1 + IMM2_SIZE;
8172       }
8173     else
8174       {
8175       if (common->currententry == NULL || common->currententry->start == 0)
8176         stacksize = 0;
8177       else
8178         {
8179         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8180         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8181         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8182         while (stacksize > 0)
8183           {
8184           if ((int)GET2(slot, 0) == i)
8185             break;
8186           slot += common->name_entry_size;
8187           stacksize--;
8188           }
8189         }
8190 
8191       if (stacksize != 0)
8192         matchingpath += 1 + 2 * IMM2_SIZE;
8193       }
8194 
8195       /* The stacksize == 0 is a common "else" case. */
8196       if (stacksize == 0)
8197         {
8198         if (*cc == OP_ALT)
8199           {
8200           matchingpath = cc + 1 + LINK_SIZE;
8201           cc += GET(cc, 1);
8202           }
8203         else
8204           matchingpath = cc;
8205         }
8206     }
8207   else
8208     {
8209     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8210     /* Similar code as PUSH_BACKTRACK macro. */
8211     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8212     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8213       return NULL;
8214     memset(assert, 0, sizeof(assert_backtrack));
8215     assert->common.cc = matchingpath;
8216     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8217     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8218     }
8219   }
8220 
8221 compile_matchingpath(common, matchingpath, cc, backtrack);
8222 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8223   return NULL;
8224 
8225 if (opcode == OP_ONCE)
8226   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8227 
8228 stacksize = 0;
8229 if (repeat_type == OP_MINUPTO)
8230   {
8231   /* We need to preserve the counter. TMP2 will be used below. */
8232   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8233   stacksize++;
8234   }
8235 if (ket != OP_KET || bra != OP_BRA)
8236   stacksize++;
8237 if (offset != 0)
8238   {
8239   if (common->capture_last_ptr != 0)
8240     stacksize++;
8241   if (common->optimized_cbracket[offset >> 1] == 0)
8242     stacksize += 2;
8243   }
8244 if (has_alternatives && opcode != OP_ONCE)
8245   stacksize++;
8246 
8247 if (stacksize > 0)
8248   allocate_stack(common, stacksize);
8249 
8250 stacksize = 0;
8251 if (repeat_type == OP_MINUPTO)
8252   {
8253   /* TMP2 was set above. */
8254   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8255   stacksize++;
8256   }
8257 
8258 if (ket != OP_KET || bra != OP_BRA)
8259   {
8260   if (ket != OP_KET)
8261     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8262   else
8263     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8264   stacksize++;
8265   }
8266 
8267 if (offset != 0)
8268   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8269 
8270 if (has_alternatives)
8271   {
8272   if (opcode != OP_ONCE)
8273     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8274   if (ket != OP_KETRMAX)
8275     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8276   }
8277 
8278 /* Must be after the matchingpath label. */
8279 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8280   {
8281   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8282   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8283   }
8284 
8285 if (ket == OP_KETRMAX)
8286   {
8287   if (repeat_type != 0)
8288     {
8289     if (has_alternatives)
8290       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8291     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8292     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8293     /* Drop STR_PTR for greedy plus quantifier. */
8294     if (opcode != OP_ONCE)
8295       free_stack(common, 1);
8296     }
8297   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8298     {
8299     if (has_alternatives)
8300       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8301     /* Checking zero-length iteration. */
8302     if (opcode != OP_ONCE)
8303       {
8304       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8305       /* Drop STR_PTR for greedy plus quantifier. */
8306       if (bra != OP_BRAZERO)
8307         free_stack(common, 1);
8308       }
8309     else
8310       /* TMP2 must contain the starting STR_PTR. */
8311       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8312     }
8313   else
8314     JUMPTO(SLJIT_JUMP, rmax_label);
8315   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8316   }
8317 
8318 if (repeat_type == OP_EXACT)
8319   {
8320   count_match(common);
8321   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8322   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8323   }
8324 else if (repeat_type == OP_UPTO)
8325   {
8326   /* We need to preserve the counter. */
8327   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8328   allocate_stack(common, 1);
8329   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8330   }
8331 
8332 if (bra == OP_BRAZERO)
8333   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8334 
8335 if (bra == OP_BRAMINZERO)
8336   {
8337   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8338   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8339   if (braminzero != NULL)
8340     {
8341     JUMPHERE(braminzero);
8342     /* We need to release the end pointer to perform the
8343     backtrack for the zero-length iteration. When
8344     framesize is < 0, OP_ONCE will do the release itself. */
8345     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8346       {
8347       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8348       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8349       }
8350     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8351       free_stack(common, 1);
8352     }
8353   /* Continue to the normal backtrack. */
8354   }
8355 
8356 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8357   count_match(common);
8358 
8359 /* Skip the other alternatives. */
8360 while (*cc == OP_ALT)
8361   cc += GET(cc, 1);
8362 cc += 1 + LINK_SIZE;
8363 
8364 if (opcode == OP_ONCE)
8365   {
8366   /* We temporarily encode the needs_control_head in the lowest bit.
8367      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8368      the same value for small signed numbers (including negative numbers). */
8369   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8370   }
8371 return cc + repeat_length;
8372 }
8373 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8374 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8375 {
8376 DEFINE_COMPILER;
8377 backtrack_common *backtrack;
8378 PCRE2_UCHAR opcode;
8379 int private_data_ptr;
8380 int cbraprivptr = 0;
8381 BOOL needs_control_head;
8382 int framesize;
8383 int stacksize;
8384 int offset = 0;
8385 BOOL zero = FALSE;
8386 PCRE2_SPTR ccbegin = NULL;
8387 int stack; /* Also contains the offset of control head. */
8388 struct sljit_label *loop = NULL;
8389 struct jump_list *emptymatch = NULL;
8390 
8391 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8392 if (*cc == OP_BRAPOSZERO)
8393   {
8394   zero = TRUE;
8395   cc++;
8396   }
8397 
8398 opcode = *cc;
8399 private_data_ptr = PRIVATE_DATA(cc);
8400 SLJIT_ASSERT(private_data_ptr != 0);
8401 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8402 switch(opcode)
8403   {
8404   case OP_BRAPOS:
8405   case OP_SBRAPOS:
8406   ccbegin = cc + 1 + LINK_SIZE;
8407   break;
8408 
8409   case OP_CBRAPOS:
8410   case OP_SCBRAPOS:
8411   offset = GET2(cc, 1 + LINK_SIZE);
8412   /* This case cannot be optimized in the same was as
8413   normal capturing brackets. */
8414   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8415   cbraprivptr = OVECTOR_PRIV(offset);
8416   offset <<= 1;
8417   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8418   break;
8419 
8420   default:
8421   SLJIT_ASSERT_STOP();
8422   break;
8423   }
8424 
8425 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8426 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8427 if (framesize < 0)
8428   {
8429   if (offset != 0)
8430     {
8431     stacksize = 2;
8432     if (common->capture_last_ptr != 0)
8433       stacksize++;
8434     }
8435   else
8436     stacksize = 1;
8437 
8438   if (needs_control_head)
8439     stacksize++;
8440   if (!zero)
8441     stacksize++;
8442 
8443   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8444   allocate_stack(common, stacksize);
8445   if (framesize == no_frame)
8446     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8447 
8448   stack = 0;
8449   if (offset != 0)
8450     {
8451     stack = 2;
8452     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8453     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8454     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8455     if (common->capture_last_ptr != 0)
8456       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8457     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8458     if (needs_control_head)
8459       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8460     if (common->capture_last_ptr != 0)
8461       {
8462       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8463       stack = 3;
8464       }
8465     }
8466   else
8467     {
8468     if (needs_control_head)
8469       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8470     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8471     stack = 1;
8472     }
8473 
8474   if (needs_control_head)
8475     stack++;
8476   if (!zero)
8477     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8478   if (needs_control_head)
8479     {
8480     stack--;
8481     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8482     }
8483   }
8484 else
8485   {
8486   stacksize = framesize + 1;
8487   if (!zero)
8488     stacksize++;
8489   if (needs_control_head)
8490     stacksize++;
8491   if (offset == 0)
8492     stacksize++;
8493   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8494 
8495   allocate_stack(common, stacksize);
8496   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8497   if (needs_control_head)
8498     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8499   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
8500 
8501   stack = 0;
8502   if (!zero)
8503     {
8504     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8505     stack = 1;
8506     }
8507   if (needs_control_head)
8508     {
8509     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8510     stack++;
8511     }
8512   if (offset == 0)
8513     {
8514     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8515     stack++;
8516     }
8517   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8518   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8519   stack -= 1 + (offset == 0);
8520   }
8521 
8522 if (offset != 0)
8523   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8524 
8525 loop = LABEL();
8526 while (*cc != OP_KETRPOS)
8527   {
8528   backtrack->top = NULL;
8529   backtrack->topbacktracks = NULL;
8530   cc += GET(cc, 1);
8531 
8532   compile_matchingpath(common, ccbegin, cc, backtrack);
8533   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8534     return NULL;
8535 
8536   if (framesize < 0)
8537     {
8538     if (framesize == no_frame)
8539       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8540 
8541     if (offset != 0)
8542       {
8543       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8544       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8545       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8546       if (common->capture_last_ptr != 0)
8547         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8548       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8549       }
8550     else
8551       {
8552       if (opcode == OP_SBRAPOS)
8553         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8554       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8555       }
8556 
8557     /* Even if the match is empty, we need to reset the control head. */
8558     if (needs_control_head)
8559       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8560 
8561     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8562       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8563 
8564     if (!zero)
8565       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8566     }
8567   else
8568     {
8569     if (offset != 0)
8570       {
8571       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8572       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8573       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8574       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8575       if (common->capture_last_ptr != 0)
8576         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8577       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8578       }
8579     else
8580       {
8581       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8582       OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8583       if (opcode == OP_SBRAPOS)
8584         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8585       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
8586       }
8587 
8588     /* Even if the match is empty, we need to reset the control head. */
8589     if (needs_control_head)
8590       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8591 
8592     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8593       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8594 
8595     if (!zero)
8596       {
8597       if (framesize < 0)
8598         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8599       else
8600         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8601       }
8602     }
8603 
8604   JUMPTO(SLJIT_JUMP, loop);
8605   flush_stubs(common);
8606 
8607   compile_backtrackingpath(common, backtrack->top);
8608   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8609     return NULL;
8610   set_jumps(backtrack->topbacktracks, LABEL());
8611 
8612   if (framesize < 0)
8613     {
8614     if (offset != 0)
8615       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8616     else
8617       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8618     }
8619   else
8620     {
8621     if (offset != 0)
8622       {
8623       /* Last alternative. */
8624       if (*cc == OP_KETRPOS)
8625         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8626       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8627       }
8628     else
8629       {
8630       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8631       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8632       }
8633     }
8634 
8635   if (*cc == OP_KETRPOS)
8636     break;
8637   ccbegin = cc + 1 + LINK_SIZE;
8638   }
8639 
8640 /* We don't have to restore the control head in case of a failed match. */
8641 
8642 backtrack->topbacktracks = NULL;
8643 if (!zero)
8644   {
8645   if (framesize < 0)
8646     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8647   else /* TMP2 is set to [private_data_ptr] above. */
8648     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
8649   }
8650 
8651 /* None of them matched. */
8652 set_jumps(emptymatch, LABEL());
8653 count_match(common);
8654 return cc + 1 + LINK_SIZE;
8655 }
8656 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)8657 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
8658 {
8659 int class_len;
8660 
8661 *opcode = *cc;
8662 *exact = 0;
8663 
8664 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8665   {
8666   cc++;
8667   *type = OP_CHAR;
8668   }
8669 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8670   {
8671   cc++;
8672   *type = OP_CHARI;
8673   *opcode -= OP_STARI - OP_STAR;
8674   }
8675 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8676   {
8677   cc++;
8678   *type = OP_NOT;
8679   *opcode -= OP_NOTSTAR - OP_STAR;
8680   }
8681 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8682   {
8683   cc++;
8684   *type = OP_NOTI;
8685   *opcode -= OP_NOTSTARI - OP_STAR;
8686   }
8687 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8688   {
8689   cc++;
8690   *opcode -= OP_TYPESTAR - OP_STAR;
8691   *type = OP_END;
8692   }
8693 else
8694   {
8695   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8696   *type = *opcode;
8697   cc++;
8698   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
8699   *opcode = cc[class_len - 1];
8700 
8701   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8702     {
8703     *opcode -= OP_CRSTAR - OP_STAR;
8704     *end = cc + class_len;
8705 
8706     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8707       {
8708       *exact = 1;
8709       *opcode -= OP_PLUS - OP_STAR;
8710       }
8711     }
8712   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8713     {
8714     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8715     *end = cc + class_len;
8716 
8717     if (*opcode == OP_POSPLUS)
8718       {
8719       *exact = 1;
8720       *opcode = OP_POSSTAR;
8721       }
8722     }
8723   else
8724     {
8725     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8726     *max = GET2(cc, (class_len + IMM2_SIZE));
8727     *exact = GET2(cc, class_len);
8728 
8729     if (*max == 0)
8730       {
8731       if (*opcode == OP_CRPOSRANGE)
8732         *opcode = OP_POSSTAR;
8733       else
8734         *opcode -= OP_CRRANGE - OP_STAR;
8735       }
8736     else
8737       {
8738       *max -= *exact;
8739       if (*max == 0)
8740         *opcode = OP_EXACT;
8741       else if (*max == 1)
8742         {
8743         if (*opcode == OP_CRPOSRANGE)
8744           *opcode = OP_POSQUERY;
8745         else
8746           *opcode -= OP_CRRANGE - OP_QUERY;
8747         }
8748       else
8749         {
8750         if (*opcode == OP_CRPOSRANGE)
8751           *opcode = OP_POSUPTO;
8752         else
8753           *opcode -= OP_CRRANGE - OP_UPTO;
8754         }
8755       }
8756     *end = cc + class_len + 2 * IMM2_SIZE;
8757     }
8758   return cc;
8759   }
8760 
8761 switch(*opcode)
8762   {
8763   case OP_EXACT:
8764   *exact = GET2(cc, 0);
8765   cc += IMM2_SIZE;
8766   break;
8767 
8768   case OP_PLUS:
8769   case OP_MINPLUS:
8770   *exact = 1;
8771   *opcode -= OP_PLUS - OP_STAR;
8772   break;
8773 
8774   case OP_POSPLUS:
8775   *exact = 1;
8776   *opcode = OP_POSSTAR;
8777   break;
8778 
8779   case OP_UPTO:
8780   case OP_MINUPTO:
8781   case OP_POSUPTO:
8782   *max = GET2(cc, 0);
8783   cc += IMM2_SIZE;
8784   break;
8785   }
8786 
8787 if (*type == OP_END)
8788   {
8789   *type = *cc;
8790   *end = next_opcode(common, cc);
8791   cc++;
8792   return cc;
8793   }
8794 
8795 *end = cc + 1;
8796 #ifdef SUPPORT_UNICODE
8797 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8798 #endif
8799 return cc;
8800 }
8801 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8802 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8803 {
8804 DEFINE_COMPILER;
8805 backtrack_common *backtrack;
8806 PCRE2_UCHAR opcode;
8807 PCRE2_UCHAR type;
8808 sljit_u32 max = 0, exact;
8809 BOOL fast_fail;
8810 sljit_s32 fast_str_ptr;
8811 BOOL charpos_enabled;
8812 PCRE2_UCHAR charpos_char;
8813 unsigned int charpos_othercasebit;
8814 PCRE2_SPTR end;
8815 jump_list *no_match = NULL;
8816 jump_list *no_char1_match = NULL;
8817 struct sljit_jump *jump = NULL;
8818 struct sljit_label *label;
8819 int private_data_ptr = PRIVATE_DATA(cc);
8820 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8821 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8822 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8823 int tmp_base, tmp_offset;
8824 
8825 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8826 
8827 fast_str_ptr = PRIVATE_DATA(cc + 1);
8828 fast_fail = TRUE;
8829 
8830 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8831 
8832 if (cc == common->fast_forward_bc_ptr)
8833   fast_fail = FALSE;
8834 else if (common->fast_fail_start_ptr == 0)
8835   fast_str_ptr = 0;
8836 
8837 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8838   || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8839 
8840 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8841 
8842 if (type != OP_EXTUNI)
8843   {
8844   tmp_base = TMP3;
8845   tmp_offset = 0;
8846   }
8847 else
8848   {
8849   tmp_base = SLJIT_MEM1(SLJIT_SP);
8850   tmp_offset = POSSESSIVE0;
8851   }
8852 
8853 if (fast_fail && fast_str_ptr != 0)
8854   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8855 
8856 /* Handle fixed part first. */
8857 if (exact > 1)
8858   {
8859   SLJIT_ASSERT(fast_str_ptr == 0);
8860   if (common->mode == PCRE2_JIT_COMPLETE
8861 #ifdef SUPPORT_UNICODE
8862       && !common->utf
8863 #endif
8864       )
8865     {
8866     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
8867     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
8868     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8869     label = LABEL();
8870     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8871     OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8872     JUMPTO(SLJIT_NOT_ZERO, label);
8873     }
8874   else
8875     {
8876     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8877     label = LABEL();
8878     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8879     OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8880     JUMPTO(SLJIT_NOT_ZERO, label);
8881     }
8882   }
8883 else if (exact == 1)
8884   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8885 
8886 switch(opcode)
8887   {
8888   case OP_STAR:
8889   case OP_UPTO:
8890   SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
8891 
8892   if (type == OP_ANYNL || type == OP_EXTUNI)
8893     {
8894     SLJIT_ASSERT(private_data_ptr == 0);
8895     SLJIT_ASSERT(fast_str_ptr == 0);
8896 
8897     allocate_stack(common, 2);
8898     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8899     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8900 
8901     if (opcode == OP_UPTO)
8902       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
8903 
8904     label = LABEL();
8905     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
8906     if (opcode == OP_UPTO)
8907       {
8908       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8909       OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8910       jump = JUMP(SLJIT_ZERO);
8911       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8912       }
8913 
8914     /* We cannot use TMP3 because of this allocate_stack. */
8915     allocate_stack(common, 1);
8916     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8917     JUMPTO(SLJIT_JUMP, label);
8918     if (jump != NULL)
8919       JUMPHERE(jump);
8920     }
8921   else
8922     {
8923     charpos_enabled = FALSE;
8924     charpos_char = 0;
8925     charpos_othercasebit = 0;
8926 
8927     if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
8928       {
8929       charpos_enabled = TRUE;
8930 #ifdef SUPPORT_UNICODE
8931       charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
8932 #endif
8933       if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
8934         {
8935         charpos_othercasebit = char_get_othercase_bit(common, end + 1);
8936         if (charpos_othercasebit == 0)
8937           charpos_enabled = FALSE;
8938         }
8939 
8940       if (charpos_enabled)
8941         {
8942         charpos_char = end[1];
8943         /* Consumpe the OP_CHAR opcode. */
8944         end += 2;
8945 #if PCRE2_CODE_UNIT_WIDTH == 8
8946         SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
8947 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8948         SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
8949         if ((charpos_othercasebit & 0x100) != 0)
8950           charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
8951 #endif
8952         if (charpos_othercasebit != 0)
8953           charpos_char |= charpos_othercasebit;
8954 
8955         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
8956         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
8957         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
8958         }
8959       }
8960 
8961     if (charpos_enabled)
8962       {
8963       if (opcode == OP_UPTO)
8964         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
8965 
8966       /* Search the first instance of charpos_char. */
8967       jump = JUMP(SLJIT_JUMP);
8968       label = LABEL();
8969       if (opcode == OP_UPTO)
8970         {
8971         OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8972         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
8973         }
8974       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8975       if (fast_str_ptr != 0)
8976         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8977       JUMPHERE(jump);
8978 
8979       detect_partial_match(common, &backtrack->topbacktracks);
8980       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8981       if (charpos_othercasebit != 0)
8982         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8983       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8984 
8985       if (private_data_ptr == 0)
8986         allocate_stack(common, 2);
8987       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8988       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8989       if (opcode == OP_UPTO)
8990         {
8991         OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8992         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
8993         }
8994 
8995       /* Search the last instance of charpos_char. */
8996       label = LABEL();
8997       compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
8998       if (fast_str_ptr != 0)
8999         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9000       detect_partial_match(common, &no_match);
9001       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9002       if (charpos_othercasebit != 0)
9003         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9004       if (opcode == OP_STAR)
9005         {
9006         CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9007         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9008         }
9009       else
9010         {
9011         jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9012         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9013         JUMPHERE(jump);
9014         }
9015 
9016       if (opcode == OP_UPTO)
9017         {
9018         OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9019         JUMPTO(SLJIT_NOT_ZERO, label);
9020         }
9021       else
9022         JUMPTO(SLJIT_JUMP, label);
9023 
9024       set_jumps(no_match, LABEL());
9025       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9026       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9027       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9028       }
9029 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9030     else if (common->utf)
9031       {
9032       if (private_data_ptr == 0)
9033         allocate_stack(common, 2);
9034 
9035       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9036       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9037 
9038       if (opcode == OP_UPTO)
9039         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9040 
9041       label = LABEL();
9042       compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9043       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9044 
9045       if (opcode == OP_UPTO)
9046         {
9047         OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9048         JUMPTO(SLJIT_NOT_ZERO, label);
9049         }
9050       else
9051         JUMPTO(SLJIT_JUMP, label);
9052 
9053       set_jumps(no_match, LABEL());
9054       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9055       if (fast_str_ptr != 0)
9056         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9057       }
9058 #endif
9059     else
9060       {
9061       if (private_data_ptr == 0)
9062         allocate_stack(common, 2);
9063 
9064       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9065       if (opcode == OP_UPTO)
9066         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9067 
9068       label = LABEL();
9069       detect_partial_match(common, &no_match);
9070       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9071       if (opcode == OP_UPTO)
9072         {
9073         OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9074         JUMPTO(SLJIT_NOT_ZERO, label);
9075         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9076         }
9077       else
9078         JUMPTO(SLJIT_JUMP, label);
9079 
9080       set_jumps(no_char1_match, LABEL());
9081       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9082       set_jumps(no_match, LABEL());
9083       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9084       if (fast_str_ptr != 0)
9085         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9086       }
9087     }
9088   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9089   break;
9090 
9091   case OP_MINSTAR:
9092   if (private_data_ptr == 0)
9093     allocate_stack(common, 1);
9094   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9095   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9096   if (fast_str_ptr != 0)
9097     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9098   break;
9099 
9100   case OP_MINUPTO:
9101   SLJIT_ASSERT(fast_str_ptr == 0);
9102   if (private_data_ptr == 0)
9103     allocate_stack(common, 2);
9104   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9105   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9106   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9107   break;
9108 
9109   case OP_QUERY:
9110   case OP_MINQUERY:
9111   SLJIT_ASSERT(fast_str_ptr == 0);
9112   if (private_data_ptr == 0)
9113     allocate_stack(common, 1);
9114   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9115   if (opcode == OP_QUERY)
9116     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9117   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9118   break;
9119 
9120   case OP_EXACT:
9121   break;
9122 
9123   case OP_POSSTAR:
9124 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9125   if (common->utf)
9126     {
9127     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9128     label = LABEL();
9129     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9130     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9131     JUMPTO(SLJIT_JUMP, label);
9132     set_jumps(no_match, LABEL());
9133     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9134     if (fast_str_ptr != 0)
9135       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9136     break;
9137     }
9138 #endif
9139   label = LABEL();
9140   detect_partial_match(common, &no_match);
9141   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9142   JUMPTO(SLJIT_JUMP, label);
9143   set_jumps(no_char1_match, LABEL());
9144   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9145   set_jumps(no_match, LABEL());
9146   if (fast_str_ptr != 0)
9147     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9148   break;
9149 
9150   case OP_POSUPTO:
9151   SLJIT_ASSERT(fast_str_ptr == 0);
9152 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9153   if (common->utf)
9154     {
9155     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9156     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9157     label = LABEL();
9158     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9159     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9160     OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9161     JUMPTO(SLJIT_NOT_ZERO, label);
9162     set_jumps(no_match, LABEL());
9163     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9164     break;
9165     }
9166 #endif
9167   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9168   label = LABEL();
9169   detect_partial_match(common, &no_match);
9170   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9171   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9172   JUMPTO(SLJIT_NOT_ZERO, label);
9173   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9174   set_jumps(no_char1_match, LABEL());
9175   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9176   set_jumps(no_match, LABEL());
9177   break;
9178 
9179   case OP_POSQUERY:
9180   SLJIT_ASSERT(fast_str_ptr == 0);
9181   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9182   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9183   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9184   set_jumps(no_match, LABEL());
9185   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9186   break;
9187 
9188   default:
9189   SLJIT_ASSERT_STOP();
9190   break;
9191   }
9192 
9193 count_match(common);
9194 return end;
9195 }
9196 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9197 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9198 {
9199 DEFINE_COMPILER;
9200 backtrack_common *backtrack;
9201 
9202 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9203 
9204 if (*cc == OP_FAIL)
9205   {
9206   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9207   return cc + 1;
9208   }
9209 
9210 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9211   {
9212   /* No need to check notempty conditions. */
9213   if (common->accept_label == NULL)
9214     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9215   else
9216     JUMPTO(SLJIT_JUMP, common->accept_label);
9217   return cc + 1;
9218   }
9219 
9220 if (common->accept_label == NULL)
9221   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9222 else
9223   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9224 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9225 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
9226 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
9227 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
9228 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
9229 if (common->accept_label == NULL)
9230   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
9231 else
9232   JUMPTO(SLJIT_ZERO, common->accept_label);
9233 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9234 if (common->accept_label == NULL)
9235   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9236 else
9237   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9238 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9239 return cc + 1;
9240 }
9241 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)9242 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
9243 {
9244 DEFINE_COMPILER;
9245 int offset = GET2(cc, 1);
9246 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9247 
9248 /* Data will be discarded anyway... */
9249 if (common->currententry != NULL)
9250   return cc + 1 + IMM2_SIZE;
9251 
9252 if (!optimized_cbracket)
9253   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9254 offset <<= 1;
9255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9256 if (!optimized_cbracket)
9257   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9258 return cc + 1 + IMM2_SIZE;
9259 }
9260 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9261 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9262 {
9263 DEFINE_COMPILER;
9264 backtrack_common *backtrack;
9265 PCRE2_UCHAR opcode = *cc;
9266 PCRE2_SPTR ccend = cc + 1;
9267 
9268 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9269   ccend += 2 + cc[1];
9270 
9271 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9272 
9273 if (opcode == OP_SKIP)
9274   {
9275   allocate_stack(common, 1);
9276   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9277   return ccend;
9278   }
9279 
9280 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9281   {
9282   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9283   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9284   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9285   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9286   }
9287 
9288 return ccend;
9289 }
9290 
9291 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
9292 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)9293 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
9294 {
9295 DEFINE_COMPILER;
9296 backtrack_common *backtrack;
9297 BOOL needs_control_head;
9298 int size;
9299 
9300 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9301 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9302 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9303 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9304 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9305 
9306 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9307 size = 3 + (size < 0 ? 0 : size);
9308 
9309 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9310 allocate_stack(common, size);
9311 if (size > 3)
9312   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9313 else
9314   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9318 
9319 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9320 if (size >= 0)
9321   init_frame(common, cc, ccend, size - 1, 0, FALSE);
9322 }
9323 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)9324 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
9325 {
9326 DEFINE_COMPILER;
9327 backtrack_common *backtrack;
9328 BOOL has_then_trap = FALSE;
9329 then_trap_backtrack *save_then_trap = NULL;
9330 
9331 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9332 
9333 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9334   {
9335   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9336   has_then_trap = TRUE;
9337   save_then_trap = common->then_trap;
9338   /* Tail item on backtrack. */
9339   compile_then_trap_matchingpath(common, cc, ccend, parent);
9340   }
9341 
9342 while (cc < ccend)
9343   {
9344   switch(*cc)
9345     {
9346     case OP_SOD:
9347     case OP_SOM:
9348     case OP_NOT_WORD_BOUNDARY:
9349     case OP_WORD_BOUNDARY:
9350     case OP_EODN:
9351     case OP_EOD:
9352     case OP_DOLL:
9353     case OP_DOLLM:
9354     case OP_CIRC:
9355     case OP_CIRCM:
9356     case OP_REVERSE:
9357     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9358     break;
9359 
9360     case OP_NOT_DIGIT:
9361     case OP_DIGIT:
9362     case OP_NOT_WHITESPACE:
9363     case OP_WHITESPACE:
9364     case OP_NOT_WORDCHAR:
9365     case OP_WORDCHAR:
9366     case OP_ANY:
9367     case OP_ALLANY:
9368     case OP_ANYBYTE:
9369     case OP_NOTPROP:
9370     case OP_PROP:
9371     case OP_ANYNL:
9372     case OP_NOT_HSPACE:
9373     case OP_HSPACE:
9374     case OP_NOT_VSPACE:
9375     case OP_VSPACE:
9376     case OP_EXTUNI:
9377     case OP_NOT:
9378     case OP_NOTI:
9379     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9380     break;
9381 
9382     case OP_SET_SOM:
9383     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9384     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9385     allocate_stack(common, 1);
9386     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9387     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9388     cc++;
9389     break;
9390 
9391     case OP_CHAR:
9392     case OP_CHARI:
9393     if (common->mode == PCRE2_JIT_COMPLETE)
9394       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9395     else
9396       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9397     break;
9398 
9399     case OP_STAR:
9400     case OP_MINSTAR:
9401     case OP_PLUS:
9402     case OP_MINPLUS:
9403     case OP_QUERY:
9404     case OP_MINQUERY:
9405     case OP_UPTO:
9406     case OP_MINUPTO:
9407     case OP_EXACT:
9408     case OP_POSSTAR:
9409     case OP_POSPLUS:
9410     case OP_POSQUERY:
9411     case OP_POSUPTO:
9412     case OP_STARI:
9413     case OP_MINSTARI:
9414     case OP_PLUSI:
9415     case OP_MINPLUSI:
9416     case OP_QUERYI:
9417     case OP_MINQUERYI:
9418     case OP_UPTOI:
9419     case OP_MINUPTOI:
9420     case OP_EXACTI:
9421     case OP_POSSTARI:
9422     case OP_POSPLUSI:
9423     case OP_POSQUERYI:
9424     case OP_POSUPTOI:
9425     case OP_NOTSTAR:
9426     case OP_NOTMINSTAR:
9427     case OP_NOTPLUS:
9428     case OP_NOTMINPLUS:
9429     case OP_NOTQUERY:
9430     case OP_NOTMINQUERY:
9431     case OP_NOTUPTO:
9432     case OP_NOTMINUPTO:
9433     case OP_NOTEXACT:
9434     case OP_NOTPOSSTAR:
9435     case OP_NOTPOSPLUS:
9436     case OP_NOTPOSQUERY:
9437     case OP_NOTPOSUPTO:
9438     case OP_NOTSTARI:
9439     case OP_NOTMINSTARI:
9440     case OP_NOTPLUSI:
9441     case OP_NOTMINPLUSI:
9442     case OP_NOTQUERYI:
9443     case OP_NOTMINQUERYI:
9444     case OP_NOTUPTOI:
9445     case OP_NOTMINUPTOI:
9446     case OP_NOTEXACTI:
9447     case OP_NOTPOSSTARI:
9448     case OP_NOTPOSPLUSI:
9449     case OP_NOTPOSQUERYI:
9450     case OP_NOTPOSUPTOI:
9451     case OP_TYPESTAR:
9452     case OP_TYPEMINSTAR:
9453     case OP_TYPEPLUS:
9454     case OP_TYPEMINPLUS:
9455     case OP_TYPEQUERY:
9456     case OP_TYPEMINQUERY:
9457     case OP_TYPEUPTO:
9458     case OP_TYPEMINUPTO:
9459     case OP_TYPEEXACT:
9460     case OP_TYPEPOSSTAR:
9461     case OP_TYPEPOSPLUS:
9462     case OP_TYPEPOSQUERY:
9463     case OP_TYPEPOSUPTO:
9464     cc = compile_iterator_matchingpath(common, cc, parent);
9465     break;
9466 
9467     case OP_CLASS:
9468     case OP_NCLASS:
9469     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
9470       cc = compile_iterator_matchingpath(common, cc, parent);
9471     else
9472       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9473     break;
9474 
9475 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9476     case OP_XCLASS:
9477     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9478       cc = compile_iterator_matchingpath(common, cc, parent);
9479     else
9480       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9481     break;
9482 #endif
9483 
9484     case OP_REF:
9485     case OP_REFI:
9486     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9487       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9488     else
9489       {
9490       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9491       cc += 1 + IMM2_SIZE;
9492       }
9493     break;
9494 
9495     case OP_DNREF:
9496     case OP_DNREFI:
9497     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9498       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9499     else
9500       {
9501       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9502       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9503       cc += 1 + 2 * IMM2_SIZE;
9504       }
9505     break;
9506 
9507     case OP_RECURSE:
9508     cc = compile_recurse_matchingpath(common, cc, parent);
9509     break;
9510 
9511     case OP_CALLOUT:
9512     case OP_CALLOUT_STR:
9513     cc = compile_callout_matchingpath(common, cc, parent);
9514     break;
9515 
9516     case OP_ASSERT:
9517     case OP_ASSERT_NOT:
9518     case OP_ASSERTBACK:
9519     case OP_ASSERTBACK_NOT:
9520     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9521     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9522     break;
9523 
9524     case OP_BRAMINZERO:
9525     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9526     cc = bracketend(cc + 1);
9527     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9528       {
9529       allocate_stack(common, 1);
9530       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9531       }
9532     else
9533       {
9534       allocate_stack(common, 2);
9535       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9536       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9537       }
9538     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9539     count_match(common);
9540     break;
9541 
9542     case OP_ONCE:
9543     case OP_ONCE_NC:
9544     case OP_BRA:
9545     case OP_CBRA:
9546     case OP_COND:
9547     case OP_SBRA:
9548     case OP_SCBRA:
9549     case OP_SCOND:
9550     cc = compile_bracket_matchingpath(common, cc, parent);
9551     break;
9552 
9553     case OP_BRAZERO:
9554     if (cc[1] > OP_ASSERTBACK_NOT)
9555       cc = compile_bracket_matchingpath(common, cc, parent);
9556     else
9557       {
9558       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9559       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9560       }
9561     break;
9562 
9563     case OP_BRAPOS:
9564     case OP_CBRAPOS:
9565     case OP_SBRAPOS:
9566     case OP_SCBRAPOS:
9567     case OP_BRAPOSZERO:
9568     cc = compile_bracketpos_matchingpath(common, cc, parent);
9569     break;
9570 
9571     case OP_MARK:
9572     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9573     SLJIT_ASSERT(common->mark_ptr != 0);
9574     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9575     allocate_stack(common, common->has_skip_arg ? 5 : 1);
9576     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9577     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9578     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9579     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9580     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9581     if (common->has_skip_arg)
9582       {
9583       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9584       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9585       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9586       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9587       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9588       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9589       }
9590     cc += 1 + 2 + cc[1];
9591     break;
9592 
9593     case OP_PRUNE:
9594     case OP_PRUNE_ARG:
9595     case OP_SKIP:
9596     case OP_SKIP_ARG:
9597     case OP_THEN:
9598     case OP_THEN_ARG:
9599     case OP_COMMIT:
9600     cc = compile_control_verb_matchingpath(common, cc, parent);
9601     break;
9602 
9603     case OP_FAIL:
9604     case OP_ACCEPT:
9605     case OP_ASSERT_ACCEPT:
9606     cc = compile_fail_accept_matchingpath(common, cc, parent);
9607     break;
9608 
9609     case OP_CLOSE:
9610     cc = compile_close_matchingpath(common, cc);
9611     break;
9612 
9613     case OP_SKIPZERO:
9614     cc = bracketend(cc + 1);
9615     break;
9616 
9617     default:
9618     SLJIT_ASSERT_STOP();
9619     return;
9620     }
9621   if (cc == NULL)
9622     return;
9623   }
9624 
9625 if (has_then_trap)
9626   {
9627   /* Head item on backtrack. */
9628   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9629   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9630   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9631   common->then_trap = save_then_trap;
9632   }
9633 SLJIT_ASSERT(cc == ccend);
9634 }
9635 
9636 #undef PUSH_BACKTRACK
9637 #undef PUSH_BACKTRACK_NOVALUE
9638 #undef BACKTRACK_AS
9639 
9640 #define COMPILE_BACKTRACKINGPATH(current) \
9641   do \
9642     { \
9643     compile_backtrackingpath(common, (current)); \
9644     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9645       return; \
9646     } \
9647   while (0)
9648 
9649 #define CURRENT_AS(type) ((type *)current)
9650 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9651 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9652 {
9653 DEFINE_COMPILER;
9654 PCRE2_SPTR cc = current->cc;
9655 PCRE2_UCHAR opcode;
9656 PCRE2_UCHAR type;
9657 sljit_u32 max = 0, exact;
9658 struct sljit_label *label = NULL;
9659 struct sljit_jump *jump = NULL;
9660 jump_list *jumplist = NULL;
9661 PCRE2_SPTR end;
9662 int private_data_ptr = PRIVATE_DATA(cc);
9663 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9664 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9665 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9666 
9667 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9668 
9669 switch(opcode)
9670   {
9671   case OP_STAR:
9672   case OP_UPTO:
9673   if (type == OP_ANYNL || type == OP_EXTUNI)
9674     {
9675     SLJIT_ASSERT(private_data_ptr == 0);
9676     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9677     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9678     free_stack(common, 1);
9679     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9680     }
9681   else
9682     {
9683     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9684       {
9685       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9686       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9687       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9688 
9689       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9690       label = LABEL();
9691       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9692       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9693       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9694         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9695       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9696       skip_char_back(common);
9697       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9698       }
9699     else
9700       {
9701       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9702       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9703       skip_char_back(common);
9704       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9705       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9706       }
9707     JUMPHERE(jump);
9708     if (private_data_ptr == 0)
9709       free_stack(common, 2);
9710     }
9711   break;
9712 
9713   case OP_MINSTAR:
9714   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9715   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9716   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9717   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9718   set_jumps(jumplist, LABEL());
9719   if (private_data_ptr == 0)
9720     free_stack(common, 1);
9721   break;
9722 
9723   case OP_MINUPTO:
9724   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9725   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9726   OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9727   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9728 
9729   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9730   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9731   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9732   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9733 
9734   set_jumps(jumplist, LABEL());
9735   if (private_data_ptr == 0)
9736     free_stack(common, 2);
9737   break;
9738 
9739   case OP_QUERY:
9740   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9741   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9742   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9743   jump = JUMP(SLJIT_JUMP);
9744   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9745   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9746   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9747   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9748   JUMPHERE(jump);
9749   if (private_data_ptr == 0)
9750     free_stack(common, 1);
9751   break;
9752 
9753   case OP_MINQUERY:
9754   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9755   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9756   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9757   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9758   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9759   set_jumps(jumplist, LABEL());
9760   JUMPHERE(jump);
9761   if (private_data_ptr == 0)
9762     free_stack(common, 1);
9763   break;
9764 
9765   case OP_EXACT:
9766   case OP_POSSTAR:
9767   case OP_POSQUERY:
9768   case OP_POSUPTO:
9769   break;
9770 
9771   default:
9772   SLJIT_ASSERT_STOP();
9773   break;
9774   }
9775 
9776 set_jumps(current->topbacktracks, LABEL());
9777 }
9778 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9779 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9780 {
9781 DEFINE_COMPILER;
9782 PCRE2_SPTR cc = current->cc;
9783 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9784 PCRE2_UCHAR type;
9785 
9786 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9787 
9788 if ((type & 0x1) == 0)
9789   {
9790   /* Maximize case. */
9791   set_jumps(current->topbacktracks, LABEL());
9792   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9793   free_stack(common, 1);
9794   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9795   return;
9796   }
9797 
9798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9799 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9800 set_jumps(current->topbacktracks, LABEL());
9801 free_stack(common, ref ? 2 : 3);
9802 }
9803 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9804 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9805 {
9806 DEFINE_COMPILER;
9807 
9808 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9809   compile_backtrackingpath(common, current->top);
9810 set_jumps(current->topbacktracks, LABEL());
9811 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9812   return;
9813 
9814 if (common->has_set_som && common->mark_ptr != 0)
9815   {
9816   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9817   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9818   free_stack(common, 2);
9819   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9820   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9821   }
9822 else if (common->has_set_som || common->mark_ptr != 0)
9823   {
9824   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9825   free_stack(common, 1);
9826   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9827   }
9828 }
9829 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9830 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9831 {
9832 DEFINE_COMPILER;
9833 PCRE2_SPTR cc = current->cc;
9834 PCRE2_UCHAR bra = OP_BRA;
9835 struct sljit_jump *brajump = NULL;
9836 
9837 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9838 if (*cc == OP_BRAZERO)
9839   {
9840   bra = *cc;
9841   cc++;
9842   }
9843 
9844 if (bra == OP_BRAZERO)
9845   {
9846   SLJIT_ASSERT(current->topbacktracks == NULL);
9847   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9848   }
9849 
9850 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9851   {
9852   set_jumps(current->topbacktracks, LABEL());
9853 
9854   if (bra == OP_BRAZERO)
9855     {
9856     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9857     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9858     free_stack(common, 1);
9859     }
9860   return;
9861   }
9862 
9863 if (bra == OP_BRAZERO)
9864   {
9865   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
9866     {
9867     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9868     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9869     free_stack(common, 1);
9870     return;
9871     }
9872   free_stack(common, 1);
9873   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9874   }
9875 
9876 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
9877   {
9878   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
9879   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9880   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
9881 
9882   set_jumps(current->topbacktracks, LABEL());
9883   }
9884 else
9885   set_jumps(current->topbacktracks, LABEL());
9886 
9887 if (bra == OP_BRAZERO)
9888   {
9889   /* We know there is enough place on the stack. */
9890   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9891   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9892   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
9893   JUMPHERE(brajump);
9894   }
9895 }
9896 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)9897 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9898 {
9899 DEFINE_COMPILER;
9900 int opcode, stacksize, alt_count, alt_max;
9901 int offset = 0;
9902 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
9903 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
9904 PCRE2_SPTR cc = current->cc;
9905 PCRE2_SPTR ccbegin;
9906 PCRE2_SPTR ccprev;
9907 PCRE2_UCHAR bra = OP_BRA;
9908 PCRE2_UCHAR ket;
9909 assert_backtrack *assert;
9910 sljit_uw *next_update_addr = NULL;
9911 BOOL has_alternatives;
9912 BOOL needs_control_head = FALSE;
9913 struct sljit_jump *brazero = NULL;
9914 struct sljit_jump *alt1 = NULL;
9915 struct sljit_jump *alt2 = NULL;
9916 struct sljit_jump *once = NULL;
9917 struct sljit_jump *cond = NULL;
9918 struct sljit_label *rmin_label = NULL;
9919 struct sljit_label *exact_label = NULL;
9920 
9921 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9922   {
9923   bra = *cc;
9924   cc++;
9925   }
9926 
9927 opcode = *cc;
9928 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
9929 ket = *ccbegin;
9930 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
9931   {
9932   repeat_ptr = PRIVATE_DATA(ccbegin);
9933   repeat_type = PRIVATE_DATA(ccbegin + 2);
9934   repeat_count = PRIVATE_DATA(ccbegin + 3);
9935   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
9936   if (repeat_type == OP_UPTO)
9937     ket = OP_KETRMAX;
9938   if (repeat_type == OP_MINUPTO)
9939     ket = OP_KETRMIN;
9940   }
9941 ccbegin = cc;
9942 cc += GET(cc, 1);
9943 has_alternatives = *cc == OP_ALT;
9944 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9945   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
9946 if (opcode == OP_CBRA || opcode == OP_SCBRA)
9947   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
9948 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9949   opcode = OP_SCOND;
9950 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
9951   opcode = OP_ONCE;
9952 
9953 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
9954 
9955 /* Decoding the needs_control_head in framesize. */
9956 if (opcode == OP_ONCE)
9957   {
9958   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
9959   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
9960   }
9961 
9962 if (ket != OP_KET && repeat_type != 0)
9963   {
9964   /* TMP1 is used in OP_KETRMIN below. */
9965   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9966   free_stack(common, 1);
9967   if (repeat_type == OP_UPTO)
9968     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
9969   else
9970     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9971   }
9972 
9973 if (ket == OP_KETRMAX)
9974   {
9975   if (bra == OP_BRAZERO)
9976     {
9977     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9978     free_stack(common, 1);
9979     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9980     }
9981   }
9982 else if (ket == OP_KETRMIN)
9983   {
9984   if (bra != OP_BRAMINZERO)
9985     {
9986     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9987     if (repeat_type != 0)
9988       {
9989       /* TMP1 was set a few lines above. */
9990       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9991       /* Drop STR_PTR for non-greedy plus quantifier. */
9992       if (opcode != OP_ONCE)
9993         free_stack(common, 1);
9994       }
9995     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
9996       {
9997       /* Checking zero-length iteration. */
9998       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
9999         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10000       else
10001         {
10002         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10003         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10004         }
10005       /* Drop STR_PTR for non-greedy plus quantifier. */
10006       if (opcode != OP_ONCE)
10007         free_stack(common, 1);
10008       }
10009     else
10010       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10011     }
10012   rmin_label = LABEL();
10013   if (repeat_type != 0)
10014     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10015   }
10016 else if (bra == OP_BRAZERO)
10017   {
10018   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10019   free_stack(common, 1);
10020   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10021   }
10022 else if (repeat_type == OP_EXACT)
10023   {
10024   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10025   exact_label = LABEL();
10026   }
10027 
10028 if (offset != 0)
10029   {
10030   if (common->capture_last_ptr != 0)
10031     {
10032     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10033     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10034     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10035     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10036     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10037     free_stack(common, 3);
10038     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10039     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10040     }
10041   else if (common->optimized_cbracket[offset >> 1] == 0)
10042     {
10043     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10044     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10045     free_stack(common, 2);
10046     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10047     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10048     }
10049   }
10050 
10051 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10052   {
10053   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10054     {
10055     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10056     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10057     }
10058   once = JUMP(SLJIT_JUMP);
10059   }
10060 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10061   {
10062   if (has_alternatives)
10063     {
10064     /* Always exactly one alternative. */
10065     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066     free_stack(common, 1);
10067 
10068     alt_max = 2;
10069     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10070     }
10071   }
10072 else if (has_alternatives)
10073   {
10074   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10075   free_stack(common, 1);
10076 
10077   if (alt_max > 4)
10078     {
10079     /* Table jump if alt_max is greater than 4. */
10080     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10081     if (SLJIT_UNLIKELY(next_update_addr == NULL))
10082       return;
10083     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10084     add_label_addr(common, next_update_addr++);
10085     }
10086   else
10087     {
10088     if (alt_max == 4)
10089       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10090     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10091     }
10092   }
10093 
10094 COMPILE_BACKTRACKINGPATH(current->top);
10095 if (current->topbacktracks)
10096   set_jumps(current->topbacktracks, LABEL());
10097 
10098 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10099   {
10100   /* Conditional block always has at most one alternative. */
10101   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10102     {
10103     SLJIT_ASSERT(has_alternatives);
10104     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10105     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10106       {
10107       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10108       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10109       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10110       }
10111     cond = JUMP(SLJIT_JUMP);
10112     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10113     }
10114   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10115     {
10116     SLJIT_ASSERT(has_alternatives);
10117     cond = JUMP(SLJIT_JUMP);
10118     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10119     }
10120   else
10121     SLJIT_ASSERT(!has_alternatives);
10122   }
10123 
10124 if (has_alternatives)
10125   {
10126   alt_count = sizeof(sljit_uw);
10127   do
10128     {
10129     current->top = NULL;
10130     current->topbacktracks = NULL;
10131     current->nextbacktracks = NULL;
10132     /* Conditional blocks always have an additional alternative, even if it is empty. */
10133     if (*cc == OP_ALT)
10134       {
10135       ccprev = cc + 1 + LINK_SIZE;
10136       cc += GET(cc, 1);
10137       if (opcode != OP_COND && opcode != OP_SCOND)
10138         {
10139         if (opcode != OP_ONCE)
10140           {
10141           if (private_data_ptr != 0)
10142             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10143           else
10144             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10145           }
10146         else
10147           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10148         }
10149       compile_matchingpath(common, ccprev, cc, current);
10150       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10151         return;
10152       }
10153 
10154     /* Instructions after the current alternative is successfully matched. */
10155     /* There is a similar code in compile_bracket_matchingpath. */
10156     if (opcode == OP_ONCE)
10157       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10158 
10159     stacksize = 0;
10160     if (repeat_type == OP_MINUPTO)
10161       {
10162       /* We need to preserve the counter. TMP2 will be used below. */
10163       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10164       stacksize++;
10165       }
10166     if (ket != OP_KET || bra != OP_BRA)
10167       stacksize++;
10168     if (offset != 0)
10169       {
10170       if (common->capture_last_ptr != 0)
10171         stacksize++;
10172       if (common->optimized_cbracket[offset >> 1] == 0)
10173         stacksize += 2;
10174       }
10175     if (opcode != OP_ONCE)
10176       stacksize++;
10177 
10178     if (stacksize > 0)
10179       allocate_stack(common, stacksize);
10180 
10181     stacksize = 0;
10182     if (repeat_type == OP_MINUPTO)
10183       {
10184       /* TMP2 was set above. */
10185       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10186       stacksize++;
10187       }
10188 
10189     if (ket != OP_KET || bra != OP_BRA)
10190       {
10191       if (ket != OP_KET)
10192         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10193       else
10194         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10195       stacksize++;
10196       }
10197 
10198     if (offset != 0)
10199       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10200 
10201     if (opcode != OP_ONCE)
10202       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10203 
10204     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10205       {
10206       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10207       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10208       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10209       }
10210 
10211     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10212 
10213     if (opcode != OP_ONCE)
10214       {
10215       if (alt_max > 4)
10216         add_label_addr(common, next_update_addr++);
10217       else
10218         {
10219         if (alt_count != 2 * sizeof(sljit_uw))
10220           {
10221           JUMPHERE(alt1);
10222           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10223             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10224           }
10225         else
10226           {
10227           JUMPHERE(alt2);
10228           if (alt_max == 4)
10229             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10230           }
10231         }
10232       alt_count += sizeof(sljit_uw);
10233       }
10234 
10235     COMPILE_BACKTRACKINGPATH(current->top);
10236     if (current->topbacktracks)
10237       set_jumps(current->topbacktracks, LABEL());
10238     SLJIT_ASSERT(!current->nextbacktracks);
10239     }
10240   while (*cc == OP_ALT);
10241 
10242   if (cond != NULL)
10243     {
10244     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10245     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10246     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10247       {
10248       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10249       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10250       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10251       }
10252     JUMPHERE(cond);
10253     }
10254 
10255   /* Free the STR_PTR. */
10256   if (private_data_ptr == 0)
10257     free_stack(common, 1);
10258   }
10259 
10260 if (offset != 0)
10261   {
10262   /* Using both tmp register is better for instruction scheduling. */
10263   if (common->optimized_cbracket[offset >> 1] != 0)
10264     {
10265     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10266     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10267     free_stack(common, 2);
10268     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10269     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10270     }
10271   else
10272     {
10273     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10274     free_stack(common, 1);
10275     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10276     }
10277   }
10278 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10279   {
10280   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10281   free_stack(common, 1);
10282   }
10283 else if (opcode == OP_ONCE)
10284   {
10285   cc = ccbegin + GET(ccbegin, 1);
10286   stacksize = needs_control_head ? 1 : 0;
10287 
10288   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10289     {
10290     /* Reset head and drop saved frame. */
10291     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10292     }
10293   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10294     {
10295     /* The STR_PTR must be released. */
10296     stacksize++;
10297     }
10298 
10299   if (stacksize > 0)
10300     free_stack(common, stacksize);
10301 
10302   JUMPHERE(once);
10303   /* Restore previous private_data_ptr */
10304   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10305     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
10306   else if (ket == OP_KETRMIN)
10307     {
10308     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10309     /* See the comment below. */
10310     free_stack(common, 2);
10311     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10312     }
10313   }
10314 
10315 if (repeat_type == OP_EXACT)
10316   {
10317   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10318   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10319   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10320   }
10321 else if (ket == OP_KETRMAX)
10322   {
10323   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10324   if (bra != OP_BRAZERO)
10325     free_stack(common, 1);
10326 
10327   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10328   if (bra == OP_BRAZERO)
10329     {
10330     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10331     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10332     JUMPHERE(brazero);
10333     free_stack(common, 1);
10334     }
10335   }
10336 else if (ket == OP_KETRMIN)
10337   {
10338   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10339 
10340   /* OP_ONCE removes everything in case of a backtrack, so we don't
10341   need to explicitly release the STR_PTR. The extra release would
10342   affect badly the free_stack(2) above. */
10343   if (opcode != OP_ONCE)
10344     free_stack(common, 1);
10345   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10346   if (opcode == OP_ONCE)
10347     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10348   else if (bra == OP_BRAMINZERO)
10349     free_stack(common, 1);
10350   }
10351 else if (bra == OP_BRAZERO)
10352   {
10353   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10354   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10355   JUMPHERE(brazero);
10356   }
10357 }
10358 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10359 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10360 {
10361 DEFINE_COMPILER;
10362 int offset;
10363 struct sljit_jump *jump;
10364 
10365 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10366   {
10367   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10368     {
10369     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10370     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10371     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10372     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10373     if (common->capture_last_ptr != 0)
10374       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10375     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10376     if (common->capture_last_ptr != 0)
10377       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10378     }
10379   set_jumps(current->topbacktracks, LABEL());
10380   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10381   return;
10382   }
10383 
10384 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10385 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10386 
10387 if (current->topbacktracks)
10388   {
10389   jump = JUMP(SLJIT_JUMP);
10390   set_jumps(current->topbacktracks, LABEL());
10391   /* Drop the stack frame. */
10392   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10393   JUMPHERE(jump);
10394   }
10395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
10396 }
10397 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10398 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10399 {
10400 assert_backtrack backtrack;
10401 
10402 current->top = NULL;
10403 current->topbacktracks = NULL;
10404 current->nextbacktracks = NULL;
10405 if (current->cc[1] > OP_ASSERTBACK_NOT)
10406   {
10407   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10408   compile_bracket_matchingpath(common, current->cc, current);
10409   compile_bracket_backtrackingpath(common, current->top);
10410   }
10411 else
10412   {
10413   memset(&backtrack, 0, sizeof(backtrack));
10414   backtrack.common.cc = current->cc;
10415   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10416   /* Manual call of compile_assert_matchingpath. */
10417   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10418   }
10419 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10420 }
10421 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10422 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10423 {
10424 DEFINE_COMPILER;
10425 PCRE2_UCHAR opcode = *current->cc;
10426 struct sljit_label *loop;
10427 struct sljit_jump *jump;
10428 
10429 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10430   {
10431   if (common->then_trap != NULL)
10432     {
10433     SLJIT_ASSERT(common->control_head_ptr != 0);
10434 
10435     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10436     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10437     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10438     jump = JUMP(SLJIT_JUMP);
10439 
10440     loop = LABEL();
10441     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
10442     JUMPHERE(jump);
10443     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
10444     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
10445     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10446     return;
10447     }
10448   else if (common->positive_assert)
10449     {
10450     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10451     return;
10452     }
10453   }
10454 
10455 if (common->local_exit)
10456   {
10457   if (common->quit_label == NULL)
10458     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10459   else
10460     JUMPTO(SLJIT_JUMP, common->quit_label);
10461   return;
10462   }
10463 
10464 if (opcode == OP_SKIP_ARG)
10465   {
10466   SLJIT_ASSERT(common->control_head_ptr != 0);
10467   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10468   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10469   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10470   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10471   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10472 
10473   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10474   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
10475   return;
10476   }
10477 
10478 if (opcode == OP_SKIP)
10479   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10480 else
10481   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10482 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10483 }
10484 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10485 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10486 {
10487 DEFINE_COMPILER;
10488 struct sljit_jump *jump;
10489 int size;
10490 
10491 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10492   {
10493   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10494   return;
10495   }
10496 
10497 size = CURRENT_AS(then_trap_backtrack)->framesize;
10498 size = 3 + (size < 0 ? 0 : size);
10499 
10500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10501 free_stack(common, size);
10502 jump = JUMP(SLJIT_JUMP);
10503 
10504 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10505 /* STACK_TOP is set by THEN. */
10506 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10507   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10509 free_stack(common, 3);
10510 
10511 JUMPHERE(jump);
10512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10513 }
10514 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10515 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10516 {
10517 DEFINE_COMPILER;
10518 then_trap_backtrack *save_then_trap = common->then_trap;
10519 
10520 while (current)
10521   {
10522   if (current->nextbacktracks != NULL)
10523     set_jumps(current->nextbacktracks, LABEL());
10524   switch(*current->cc)
10525     {
10526     case OP_SET_SOM:
10527     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10528     free_stack(common, 1);
10529     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10530     break;
10531 
10532     case OP_STAR:
10533     case OP_MINSTAR:
10534     case OP_PLUS:
10535     case OP_MINPLUS:
10536     case OP_QUERY:
10537     case OP_MINQUERY:
10538     case OP_UPTO:
10539     case OP_MINUPTO:
10540     case OP_EXACT:
10541     case OP_POSSTAR:
10542     case OP_POSPLUS:
10543     case OP_POSQUERY:
10544     case OP_POSUPTO:
10545     case OP_STARI:
10546     case OP_MINSTARI:
10547     case OP_PLUSI:
10548     case OP_MINPLUSI:
10549     case OP_QUERYI:
10550     case OP_MINQUERYI:
10551     case OP_UPTOI:
10552     case OP_MINUPTOI:
10553     case OP_EXACTI:
10554     case OP_POSSTARI:
10555     case OP_POSPLUSI:
10556     case OP_POSQUERYI:
10557     case OP_POSUPTOI:
10558     case OP_NOTSTAR:
10559     case OP_NOTMINSTAR:
10560     case OP_NOTPLUS:
10561     case OP_NOTMINPLUS:
10562     case OP_NOTQUERY:
10563     case OP_NOTMINQUERY:
10564     case OP_NOTUPTO:
10565     case OP_NOTMINUPTO:
10566     case OP_NOTEXACT:
10567     case OP_NOTPOSSTAR:
10568     case OP_NOTPOSPLUS:
10569     case OP_NOTPOSQUERY:
10570     case OP_NOTPOSUPTO:
10571     case OP_NOTSTARI:
10572     case OP_NOTMINSTARI:
10573     case OP_NOTPLUSI:
10574     case OP_NOTMINPLUSI:
10575     case OP_NOTQUERYI:
10576     case OP_NOTMINQUERYI:
10577     case OP_NOTUPTOI:
10578     case OP_NOTMINUPTOI:
10579     case OP_NOTEXACTI:
10580     case OP_NOTPOSSTARI:
10581     case OP_NOTPOSPLUSI:
10582     case OP_NOTPOSQUERYI:
10583     case OP_NOTPOSUPTOI:
10584     case OP_TYPESTAR:
10585     case OP_TYPEMINSTAR:
10586     case OP_TYPEPLUS:
10587     case OP_TYPEMINPLUS:
10588     case OP_TYPEQUERY:
10589     case OP_TYPEMINQUERY:
10590     case OP_TYPEUPTO:
10591     case OP_TYPEMINUPTO:
10592     case OP_TYPEEXACT:
10593     case OP_TYPEPOSSTAR:
10594     case OP_TYPEPOSPLUS:
10595     case OP_TYPEPOSQUERY:
10596     case OP_TYPEPOSUPTO:
10597     case OP_CLASS:
10598     case OP_NCLASS:
10599 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
10600     case OP_XCLASS:
10601 #endif
10602     compile_iterator_backtrackingpath(common, current);
10603     break;
10604 
10605     case OP_REF:
10606     case OP_REFI:
10607     case OP_DNREF:
10608     case OP_DNREFI:
10609     compile_ref_iterator_backtrackingpath(common, current);
10610     break;
10611 
10612     case OP_RECURSE:
10613     compile_recurse_backtrackingpath(common, current);
10614     break;
10615 
10616     case OP_ASSERT:
10617     case OP_ASSERT_NOT:
10618     case OP_ASSERTBACK:
10619     case OP_ASSERTBACK_NOT:
10620     compile_assert_backtrackingpath(common, current);
10621     break;
10622 
10623     case OP_ONCE:
10624     case OP_ONCE_NC:
10625     case OP_BRA:
10626     case OP_CBRA:
10627     case OP_COND:
10628     case OP_SBRA:
10629     case OP_SCBRA:
10630     case OP_SCOND:
10631     compile_bracket_backtrackingpath(common, current);
10632     break;
10633 
10634     case OP_BRAZERO:
10635     if (current->cc[1] > OP_ASSERTBACK_NOT)
10636       compile_bracket_backtrackingpath(common, current);
10637     else
10638       compile_assert_backtrackingpath(common, current);
10639     break;
10640 
10641     case OP_BRAPOS:
10642     case OP_CBRAPOS:
10643     case OP_SBRAPOS:
10644     case OP_SCBRAPOS:
10645     case OP_BRAPOSZERO:
10646     compile_bracketpos_backtrackingpath(common, current);
10647     break;
10648 
10649     case OP_BRAMINZERO:
10650     compile_braminzero_backtrackingpath(common, current);
10651     break;
10652 
10653     case OP_MARK:
10654     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10655     if (common->has_skip_arg)
10656       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10657     free_stack(common, common->has_skip_arg ? 5 : 1);
10658     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10659     if (common->has_skip_arg)
10660       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10661     break;
10662 
10663     case OP_THEN:
10664     case OP_THEN_ARG:
10665     case OP_PRUNE:
10666     case OP_PRUNE_ARG:
10667     case OP_SKIP:
10668     case OP_SKIP_ARG:
10669     compile_control_verb_backtrackingpath(common, current);
10670     break;
10671 
10672     case OP_COMMIT:
10673     if (!common->local_exit)
10674       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
10675     if (common->quit_label == NULL)
10676       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10677     else
10678       JUMPTO(SLJIT_JUMP, common->quit_label);
10679     break;
10680 
10681     case OP_CALLOUT:
10682     case OP_CALLOUT_STR:
10683     case OP_FAIL:
10684     case OP_ACCEPT:
10685     case OP_ASSERT_ACCEPT:
10686     set_jumps(current->topbacktracks, LABEL());
10687     break;
10688 
10689     case OP_THEN_TRAP:
10690     /* A virtual opcode for then traps. */
10691     compile_then_trap_backtrackingpath(common, current);
10692     break;
10693 
10694     default:
10695     SLJIT_ASSERT_STOP();
10696     break;
10697     }
10698   current = current->prev;
10699   }
10700 common->then_trap = save_then_trap;
10701 }
10702 
compile_recurse(compiler_common * common)10703 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10704 {
10705 DEFINE_COMPILER;
10706 PCRE2_SPTR cc = common->start + common->currententry->start;
10707 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10708 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
10709 BOOL needs_control_head;
10710 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10711 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10712 int alternativesize;
10713 BOOL needs_frame;
10714 backtrack_common altbacktrack;
10715 struct sljit_jump *jump;
10716 
10717 /* Recurse captures then. */
10718 common->then_trap = NULL;
10719 
10720 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10721 needs_frame = framesize >= 0;
10722 if (!needs_frame)
10723   framesize = 0;
10724 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10725 
10726 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10727 common->currententry->entry = LABEL();
10728 set_jumps(common->currententry->calls, common->currententry->entry);
10729 
10730 sljit_emit_fast_enter(compiler, TMP2, 0);
10731 count_match(common);
10732 allocate_stack(common, private_data_size + framesize + alternativesize);
10733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10734 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10735 if (needs_control_head)
10736   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10738 if (needs_frame)
10739   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10740 
10741 if (alternativesize > 0)
10742   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10743 
10744 memset(&altbacktrack, 0, sizeof(backtrack_common));
10745 common->quit_label = NULL;
10746 common->accept_label = NULL;
10747 common->quit = NULL;
10748 common->accept = NULL;
10749 altbacktrack.cc = ccbegin;
10750 cc += GET(cc, 1);
10751 while (1)
10752   {
10753   altbacktrack.top = NULL;
10754   altbacktrack.topbacktracks = NULL;
10755 
10756   if (altbacktrack.cc != ccbegin)
10757     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10758 
10759   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10760   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10761     return;
10762 
10763   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10764 
10765   compile_backtrackingpath(common, altbacktrack.top);
10766   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10767     return;
10768   set_jumps(altbacktrack.topbacktracks, LABEL());
10769 
10770   if (*cc != OP_ALT)
10771     break;
10772 
10773   altbacktrack.cc = cc + 1 + LINK_SIZE;
10774   cc += GET(cc, 1);
10775   }
10776 
10777 /* None of them matched. */
10778 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10779 jump = JUMP(SLJIT_JUMP);
10780 
10781 if (common->quit != NULL)
10782   {
10783   set_jumps(common->quit, LABEL());
10784   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10785   if (needs_frame)
10786     {
10787     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10788     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10789     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10790     }
10791   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10792   common->quit = NULL;
10793   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10794   }
10795 
10796 set_jumps(common->accept, LABEL());
10797 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10798 if (needs_frame)
10799   {
10800   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10801   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10802   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10803   }
10804 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10805 
10806 JUMPHERE(jump);
10807 if (common->quit != NULL)
10808   set_jumps(common->quit, LABEL());
10809 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10810 free_stack(common, private_data_size + framesize + alternativesize);
10811 if (needs_control_head)
10812   {
10813   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
10814   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10815   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10816   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10817   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10818   }
10819 else
10820   {
10821   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10822   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10823   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10824   }
10825 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
10826 }
10827 
10828 #undef COMPILE_BACKTRACKINGPATH
10829 #undef CURRENT_AS
10830 
jit_compile(pcre2_code * code,sljit_u32 mode)10831 static int jit_compile(pcre2_code *code, sljit_u32 mode)
10832 {
10833 pcre2_real_code *re = (pcre2_real_code *)code;
10834 struct sljit_compiler *compiler;
10835 backtrack_common rootbacktrack;
10836 compiler_common common_data;
10837 compiler_common *common = &common_data;
10838 const sljit_u8 *tables = re->tables;
10839 void *allocator_data = &re->memctl;
10840 int private_data_size;
10841 PCRE2_SPTR ccend;
10842 executable_functions *functions;
10843 void *executable_func;
10844 sljit_uw executable_size;
10845 sljit_uw total_length;
10846 label_addr_list *label_addr;
10847 struct sljit_label *mainloop_label = NULL;
10848 struct sljit_label *continue_match_label;
10849 struct sljit_label *empty_match_found_label = NULL;
10850 struct sljit_label *empty_match_backtrack_label = NULL;
10851 struct sljit_label *reset_match_label;
10852 struct sljit_label *quit_label;
10853 struct sljit_jump *jump;
10854 struct sljit_jump *minlength_check_failed = NULL;
10855 struct sljit_jump *reqbyte_notfound = NULL;
10856 struct sljit_jump *empty_match = NULL;
10857 
10858 SLJIT_ASSERT(tables);
10859 
10860 memset(&rootbacktrack, 0, sizeof(backtrack_common));
10861 memset(common, 0, sizeof(compiler_common));
10862 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
10863 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
10864 
10865 common->start = rootbacktrack.cc;
10866 common->read_only_data_head = NULL;
10867 common->fcc = tables + fcc_offset;
10868 common->lcc = (sljit_sw)(tables + lcc_offset);
10869 common->mode = mode;
10870 common->might_be_empty = re->minlength == 0;
10871 common->nltype = NLTYPE_FIXED;
10872 switch(re->newline_convention)
10873   {
10874   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
10875   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
10876   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
10877   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10878   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10879   default: return PCRE2_ERROR_INTERNAL;
10880   }
10881 common->nlmax = READ_CHAR_MAX;
10882 common->nlmin = 0;
10883 if (re->bsr_convention == PCRE2_BSR_UNICODE)
10884   common->bsr_nltype = NLTYPE_ANY;
10885 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
10886   common->bsr_nltype = NLTYPE_ANYCRLF;
10887 else
10888   {
10889 #ifdef BSR_ANYCRLF
10890   common->bsr_nltype = NLTYPE_ANYCRLF;
10891 #else
10892   common->bsr_nltype = NLTYPE_ANY;
10893 #endif
10894   }
10895 common->bsr_nlmax = READ_CHAR_MAX;
10896 common->bsr_nlmin = 0;
10897 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
10898 common->ctypes = (sljit_sw)(tables + ctypes_offset);
10899 common->name_count = re->name_count;
10900 common->name_entry_size = re->name_entry_size;
10901 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
10902 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
10903 #ifdef SUPPORT_UNICODE
10904 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
10905 common->utf = (re->overall_options & PCRE2_UTF) != 0;
10906 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
10907 if (common->utf)
10908   {
10909   if (common->nltype == NLTYPE_ANY)
10910     common->nlmax = 0x2029;
10911   else if (common->nltype == NLTYPE_ANYCRLF)
10912     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10913   else
10914     {
10915     /* We only care about the first newline character. */
10916     common->nlmax = common->newline & 0xff;
10917     }
10918 
10919   if (common->nltype == NLTYPE_FIXED)
10920     common->nlmin = common->newline & 0xff;
10921   else
10922     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10923 
10924   if (common->bsr_nltype == NLTYPE_ANY)
10925     common->bsr_nlmax = 0x2029;
10926   else
10927     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10928   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10929   }
10930 #endif /* SUPPORT_UNICODE */
10931 ccend = bracketend(common->start);
10932 
10933 /* Calculate the local space size on the stack. */
10934 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
10935 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
10936 if (!common->optimized_cbracket)
10937   return PCRE2_ERROR_NOMEMORY;
10938 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
10939 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10940 #else
10941 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
10942 #endif
10943 
10944 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
10945 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
10946 common->capture_last_ptr = common->ovector_start;
10947 common->ovector_start += sizeof(sljit_sw);
10948 #endif
10949 if (!check_opcode_types(common, common->start, ccend))
10950   {
10951   SLJIT_FREE(common->optimized_cbracket, allocator_data);
10952   return PCRE2_ERROR_NOMEMORY;
10953   }
10954 
10955 /* Checking flags and updating ovector_start. */
10956 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
10957   {
10958   common->req_char_ptr = common->ovector_start;
10959   common->ovector_start += sizeof(sljit_sw);
10960   }
10961 if (mode != PCRE2_JIT_COMPLETE)
10962   {
10963   common->start_used_ptr = common->ovector_start;
10964   common->ovector_start += sizeof(sljit_sw);
10965   if (mode == PCRE2_JIT_PARTIAL_SOFT)
10966     {
10967     common->hit_start = common->ovector_start;
10968     common->ovector_start += sizeof(sljit_sw);
10969     }
10970   }
10971 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
10972   {
10973   common->match_end_ptr = common->ovector_start;
10974   common->ovector_start += sizeof(sljit_sw);
10975   }
10976 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
10977 common->control_head_ptr = 1;
10978 #endif
10979 if (common->control_head_ptr != 0)
10980   {
10981   common->control_head_ptr = common->ovector_start;
10982   common->ovector_start += sizeof(sljit_sw);
10983   }
10984 if (common->has_set_som)
10985   {
10986   /* Saving the real start pointer is necessary. */
10987   common->start_ptr = common->ovector_start;
10988   common->ovector_start += sizeof(sljit_sw);
10989   }
10990 
10991 /* Aligning ovector to even number of sljit words. */
10992 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
10993   common->ovector_start += sizeof(sljit_sw);
10994 
10995 if (common->start_ptr == 0)
10996   common->start_ptr = OVECTOR(0);
10997 
10998 /* Capturing brackets cannot be optimized if callouts are allowed. */
10999 if (common->capture_last_ptr != 0)
11000   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11001 
11002 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11003 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11004 
11005 total_length = ccend - common->start;
11006 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
11007 if (!common->private_data_ptrs)
11008   {
11009   SLJIT_FREE(common->optimized_cbracket, allocator_data);
11010   return PCRE2_ERROR_NOMEMORY;
11011   }
11012 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11013 
11014 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11015 set_private_data_ptrs(common, &private_data_size, ccend);
11016 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11017   {
11018   if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11019     detect_fast_fail(common, common->start, &private_data_size, 4);
11020   }
11021 
11022 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11023 
11024 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11025   {
11026   SLJIT_FREE(common->private_data_ptrs, allocator_data);
11027   SLJIT_FREE(common->optimized_cbracket, allocator_data);
11028   return PCRE2_ERROR_NOMEMORY;
11029   }
11030 
11031 if (common->has_then)
11032   {
11033   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11034   memset(common->then_offsets, 0, total_length);
11035   set_then_offsets(common, common->start, NULL);
11036   }
11037 
11038 compiler = sljit_create_compiler(allocator_data);
11039 if (!compiler)
11040   {
11041   SLJIT_FREE(common->optimized_cbracket, allocator_data);
11042   SLJIT_FREE(common->private_data_ptrs, allocator_data);
11043   return PCRE2_ERROR_NOMEMORY;
11044   }
11045 common->compiler = compiler;
11046 
11047 /* Main pcre_jit_exec entry. */
11048 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
11049 
11050 /* Register init. */
11051 reset_ovector(common, (re->top_bracket + 1) * 2);
11052 if (common->req_char_ptr != 0)
11053   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11054 
11055 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11057 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11058 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11059 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11060 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11061 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
11062 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
11063 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11065 
11066 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11067   reset_fast_fail(common);
11068 
11069 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11070   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11071 if (common->mark_ptr != 0)
11072   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11073 if (common->control_head_ptr != 0)
11074   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11075 
11076 /* Main part of the matching */
11077 if ((re->overall_options & PCRE2_ANCHORED) == 0)
11078   {
11079   mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options);
11080   continue_match_label = LABEL();
11081   /* Forward search if possible. */
11082   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11083     {
11084     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
11085       ;
11086     else if ((re->flags & PCRE2_FIRSTSET) != 0)
11087       fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0);
11088     else if ((re->flags & PCRE2_STARTLINE) != 0)
11089       fast_forward_newline(common);
11090     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
11091       fast_forward_start_bits(common, re->start_bitmap);
11092     }
11093   }
11094 else
11095   continue_match_label = LABEL();
11096 
11097 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11098   {
11099   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11100   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
11101   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11102   }
11103 if (common->req_char_ptr != 0)
11104   reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
11105 
11106 /* Store the current STR_PTR in OVECTOR(0). */
11107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11108 /* Copy the limit of allowed recursions. */
11109 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11110 if (common->capture_last_ptr != 0)
11111   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
11112 if (common->fast_forward_bc_ptr != NULL)
11113   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11114 
11115 if (common->start_ptr != OVECTOR(0))
11116   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11117 
11118 /* Copy the beginning of the string. */
11119 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11120   {
11121   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11122   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11123   JUMPHERE(jump);
11124   }
11125 else if (mode == PCRE2_JIT_PARTIAL_HARD)
11126   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11127 
11128 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11129 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11130   {
11131   sljit_free_compiler(compiler);
11132   SLJIT_FREE(common->optimized_cbracket, allocator_data);
11133   SLJIT_FREE(common->private_data_ptrs, allocator_data);
11134   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11135   return PCRE2_ERROR_NOMEMORY;
11136   }
11137 
11138 if (common->might_be_empty)
11139   {
11140   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11141   empty_match_found_label = LABEL();
11142   }
11143 
11144 common->accept_label = LABEL();
11145 if (common->accept != NULL)
11146   set_jumps(common->accept, common->accept_label);
11147 
11148 /* This means we have a match. Update the ovector. */
11149 copy_ovector(common, re->top_bracket + 1);
11150 common->quit_label = common->forced_quit_label = LABEL();
11151 if (common->quit != NULL)
11152   set_jumps(common->quit, common->quit_label);
11153 if (common->forced_quit != NULL)
11154   set_jumps(common->forced_quit, common->forced_quit_label);
11155 if (minlength_check_failed != NULL)
11156   SET_LABEL(minlength_check_failed, common->forced_quit_label);
11157 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11158 
11159 if (mode != PCRE2_JIT_COMPLETE)
11160   {
11161   common->partialmatchlabel = LABEL();
11162   set_jumps(common->partialmatch, common->partialmatchlabel);
11163   return_with_partial_match(common, common->quit_label);
11164   }
11165 
11166 if (common->might_be_empty)
11167   empty_match_backtrack_label = LABEL();
11168 compile_backtrackingpath(common, rootbacktrack.top);
11169 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11170   {
11171   sljit_free_compiler(compiler);
11172   SLJIT_FREE(common->optimized_cbracket, allocator_data);
11173   SLJIT_FREE(common->private_data_ptrs, allocator_data);
11174   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11175   return PCRE2_ERROR_NOMEMORY;
11176   }
11177 
11178 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11179 reset_match_label = LABEL();
11180 
11181 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11182   {
11183   /* Update hit_start only in the first time. */
11184   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11185   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
11186   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11187   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11188   JUMPHERE(jump);
11189   }
11190 
11191 /* Check we have remaining characters. */
11192 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
11193   {
11194   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11195   }
11196 
11197 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11198     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11199 
11200 if ((re->overall_options & PCRE2_ANCHORED) == 0)
11201   {
11202   if (common->ff_newline_shortcut != NULL)
11203     {
11204     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
11205     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
11206       {
11207       if (common->match_end_ptr != 0)
11208         {
11209         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
11210         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
11211         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
11212         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
11213         }
11214       else
11215         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11216       }
11217     }
11218   else
11219     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
11220   }
11221 
11222 /* No more remaining characters. */
11223 if (reqbyte_notfound != NULL)
11224   JUMPHERE(reqbyte_notfound);
11225 
11226 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11227   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11228 
11229 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11230 JUMPTO(SLJIT_JUMP, common->quit_label);
11231 
11232 flush_stubs(common);
11233 
11234 if (common->might_be_empty)
11235   {
11236   JUMPHERE(empty_match);
11237   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11238   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11239   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11240   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
11241   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11242   JUMPTO(SLJIT_ZERO, empty_match_found_label);
11243   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11244   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11245   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11246   }
11247 
11248 common->fast_forward_bc_ptr = NULL;
11249 common->fast_fail_start_ptr = 0;
11250 common->fast_fail_end_ptr = 0;
11251 common->currententry = common->entries;
11252 common->local_exit = TRUE;
11253 quit_label = common->quit_label;
11254 while (common->currententry != NULL)
11255   {
11256   /* Might add new entries. */
11257   compile_recurse(common);
11258   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11259     {
11260     sljit_free_compiler(compiler);
11261     SLJIT_FREE(common->optimized_cbracket, allocator_data);
11262     SLJIT_FREE(common->private_data_ptrs, allocator_data);
11263     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11264     return PCRE2_ERROR_NOMEMORY;
11265     }
11266   flush_stubs(common);
11267   common->currententry = common->currententry->next;
11268   }
11269 common->local_exit = FALSE;
11270 common->quit_label = quit_label;
11271 
11272 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11273 /* This is a (really) rare case. */
11274 set_jumps(common->stackalloc, LABEL());
11275 /* RETURN_ADDR is not a saved register. */
11276 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
11278 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11280 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
11281 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
11282 
11283 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11284 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11285 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11287 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
11288 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
11289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11290 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11291 
11292 /* Allocation failed. */
11293 JUMPHERE(jump);
11294 /* We break the return address cache here, but this is a really rare case. */
11295 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
11296 JUMPTO(SLJIT_JUMP, common->quit_label);
11297 
11298 /* Call limit reached. */
11299 set_jumps(common->calllimit, LABEL());
11300 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
11301 JUMPTO(SLJIT_JUMP, common->quit_label);
11302 
11303 if (common->revertframes != NULL)
11304   {
11305   set_jumps(common->revertframes, LABEL());
11306   do_revertframes(common);
11307   }
11308 if (common->wordboundary != NULL)
11309   {
11310   set_jumps(common->wordboundary, LABEL());
11311   check_wordboundary(common);
11312   }
11313 if (common->anynewline != NULL)
11314   {
11315   set_jumps(common->anynewline, LABEL());
11316   check_anynewline(common);
11317   }
11318 if (common->hspace != NULL)
11319   {
11320   set_jumps(common->hspace, LABEL());
11321   check_hspace(common);
11322   }
11323 if (common->vspace != NULL)
11324   {
11325   set_jumps(common->vspace, LABEL());
11326   check_vspace(common);
11327   }
11328 if (common->casefulcmp != NULL)
11329   {
11330   set_jumps(common->casefulcmp, LABEL());
11331   do_casefulcmp(common);
11332   }
11333 if (common->caselesscmp != NULL)
11334   {
11335   set_jumps(common->caselesscmp, LABEL());
11336   do_caselesscmp(common);
11337   }
11338 if (common->reset_match != NULL)
11339   {
11340   set_jumps(common->reset_match, LABEL());
11341   do_reset_match(common, (re->top_bracket + 1) * 2);
11342   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11343   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11344   JUMPTO(SLJIT_JUMP, reset_match_label);
11345   }
11346 #ifdef SUPPORT_UNICODE
11347 #if PCRE2_CODE_UNIT_WIDTH == 8
11348 if (common->utfreadchar != NULL)
11349   {
11350   set_jumps(common->utfreadchar, LABEL());
11351   do_utfreadchar(common);
11352   }
11353 if (common->utfreadchar16 != NULL)
11354   {
11355   set_jumps(common->utfreadchar16, LABEL());
11356   do_utfreadchar16(common);
11357   }
11358 if (common->utfreadtype8 != NULL)
11359   {
11360   set_jumps(common->utfreadtype8, LABEL());
11361   do_utfreadtype8(common);
11362   }
11363 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
11364 if (common->getucd != NULL)
11365   {
11366   set_jumps(common->getucd, LABEL());
11367   do_getucd(common);
11368   }
11369 #endif /* SUPPORT_UNICODE */
11370 
11371 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11372 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11373 
11374 executable_func = sljit_generate_code(compiler);
11375 executable_size = sljit_get_generated_code_size(compiler);
11376 label_addr = common->label_addrs;
11377 while (label_addr != NULL)
11378   {
11379   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11380   label_addr = label_addr->next;
11381   }
11382 sljit_free_compiler(compiler);
11383 if (executable_func == NULL)
11384   {
11385   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11386   return PCRE2_ERROR_NOMEMORY;
11387   }
11388 
11389 /* Reuse the function descriptor if possible. */
11390 if (re->executable_jit != NULL)
11391   functions = (executable_functions *)re->executable_jit;
11392 else
11393   {
11394   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
11395   if (functions == NULL)
11396     {
11397     /* This case is highly unlikely since we just recently
11398     freed a lot of memory. Not impossible though. */
11399     sljit_free_code(executable_func);
11400     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11401     return PCRE2_ERROR_NOMEMORY;
11402     }
11403   memset(functions, 0, sizeof(executable_functions));
11404   functions->top_bracket = re->top_bracket + 1;
11405   functions->limit_match = re->limit_match;
11406   re->executable_jit = functions;
11407   }
11408 
11409 /* Turn mode into an index. */
11410 if (mode == PCRE2_JIT_COMPLETE)
11411   mode = 0;
11412 else
11413   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
11414 
11415 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
11416 functions->executable_funcs[mode] = executable_func;
11417 functions->read_only_data_heads[mode] = common->read_only_data_head;
11418 functions->executable_sizes[mode] = executable_size;
11419 return 0;
11420 }
11421 
11422 #endif
11423 
11424 /*************************************************
11425 *        JIT compile a Regular Expression        *
11426 *************************************************/
11427 
11428 /* This function used JIT to convert a previously-compiled pattern into machine
11429 code.
11430 
11431 Arguments:
11432   code          a compiled pattern
11433   options       JIT option bits
11434 
11435 Returns:        0: success or (*NOJIT) was used
11436                <0: an error code
11437 */
11438 
11439 #define PUBLIC_JIT_COMPILE_OPTIONS \
11440   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
11441 
11442 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)11443 pcre2_jit_compile(pcre2_code *code, uint32_t options)
11444 {
11445 #ifndef SUPPORT_JIT
11446 
11447 (void)code;
11448 (void)options;
11449 return PCRE2_ERROR_JIT_BADOPTION;
11450 
11451 #else  /* SUPPORT_JIT */
11452 
11453 pcre2_real_code *re = (pcre2_real_code *)code;
11454 executable_functions *functions;
11455 int result;
11456 
11457 if (code == NULL)
11458   return PCRE2_ERROR_NULL;
11459 
11460 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
11461   return PCRE2_ERROR_JIT_BADOPTION;
11462 
11463 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
11464 
11465 functions = (executable_functions *)re->executable_jit;
11466 
11467 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
11468     || functions->executable_funcs[0] == NULL)) {
11469   result = jit_compile(code, PCRE2_JIT_COMPLETE);
11470   if (result != 0)
11471     return result;
11472   }
11473 
11474 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
11475     || functions->executable_funcs[1] == NULL)) {
11476   result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
11477   if (result != 0)
11478     return result;
11479   }
11480 
11481 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
11482     || functions->executable_funcs[2] == NULL)) {
11483   result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
11484   if (result != 0)
11485     return result;
11486   }
11487 
11488 return 0;
11489 
11490 #endif  /* SUPPORT_JIT */
11491 }
11492 
11493 /* JIT compiler uses an all-in-one approach. This improves security,
11494    since the code generator functions are not exported. */
11495 
11496 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
11497 
11498 #include "pcre2_jit_match.c"
11499 #include "pcre2_jit_misc.c"
11500 
11501 /* End of pcre2_jit_compile.c */
11502