• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2018 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include "pcre2_internal.h"
46 
47 #ifdef SUPPORT_JIT
48 
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52 
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56 
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62 
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65 
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71 
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77 
78 #include "sljit/sljitLir.c"
79 
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83 
84 /* Defines for debugging purposes. */
85 
86 /* 1 - Use unoptimized capturing brackets.
87    2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89 
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92 
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96 
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100 
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105 
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108 
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114 
115   'ab' - 'a' and 'b' regexps are concatenated
116   'a+' - 'a' is the sub-expression of the '+' operator
117 
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123 
124  Greedy star operator (*) :
125    Matching path: match happens.
126    Backtrack path: match failed.
127  Non-greedy star operator (*?) :
128    Matching path: no need to perform a match.
129    Backtrack path: match is required.
130 
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134 
135    A(B|C)D
136 
137 The generated code will be the following:
138 
139  A matching path
140  '(' matching path (pushing arguments to the stack)
141  B matching path
142  ')' matching path (pushing arguments to the stack)
143  D matching path
144  return with successful match
145 
146  D backtrack path
147  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148  B backtrack path
149  C expected path
150  jump to D matching path
151  C backtrack path
152  A backtrack path
153 
154  Notice, that the order of backtrack code paths are the opposite of the fast
155  code paths. In this way the topmost value on the stack is always belong
156  to the current backtrack code path. The backtrack path must check
157  whether there is a next alternative. If so, it needs to jump back to
158  the matching path eventually. Otherwise it needs to clear out its own stack
159  frame and continue the execution on the backtrack code paths.
160 */
161 
162 /*
163 Saved stack frames:
164 
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169 
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172 
173 Thus we can restore the private data to a particular point in the stack.
174 */
175 
176 typedef struct jit_arguments {
177   /* Pointers first. */
178   struct sljit_stack *stack;
179   PCRE2_SPTR str;
180   PCRE2_SPTR begin;
181   PCRE2_SPTR end;
182   pcre2_match_data *match_data;
183   PCRE2_SPTR startchar_ptr;
184   PCRE2_UCHAR *mark_ptr;
185   int (*callout)(pcre2_callout_block *, void *);
186   void *callout_data;
187   /* Everything else after. */
188   sljit_uw offset_limit;
189   sljit_u32 limit_match;
190   sljit_u32 oveccount;
191   sljit_u32 options;
192 } jit_arguments;
193 
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195 
196 typedef struct executable_functions {
197   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_u32 top_bracket;
201   sljit_u32 limit_match;
202 } executable_functions;
203 
204 typedef struct jump_list {
205   struct sljit_jump *jump;
206   struct jump_list *next;
207 } jump_list;
208 
209 typedef struct stub_list {
210   struct sljit_jump *start;
211   struct sljit_label *quit;
212   struct stub_list *next;
213 } stub_list;
214 
215 typedef struct label_addr_list {
216   struct sljit_label *label;
217   sljit_uw *update_addr;
218   struct label_addr_list *next;
219 } label_addr_list;
220 
221 enum frame_types {
222   no_frame = -1,
223   no_stack = -2
224 };
225 
226 enum control_types {
227   type_mark = 0,
228   type_then_trap = 1
229 };
230 
231 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
232 
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238   /* Concatenation stack. */
239   struct backtrack_common *prev;
240   jump_list *nextbacktracks;
241   /* Internal stack (for component operators). */
242   struct backtrack_common *top;
243   jump_list *topbacktracks;
244   /* Opcode pointer. */
245   PCRE2_SPTR cc;
246 } backtrack_common;
247 
248 typedef struct assert_backtrack {
249   backtrack_common common;
250   jump_list *condfailed;
251   /* Less than 0 if a frame is not needed. */
252   int framesize;
253   /* Points to our private memory word on the stack. */
254   int private_data_ptr;
255   /* For iterators. */
256   struct sljit_label *matchingpath;
257 } assert_backtrack;
258 
259 typedef struct bracket_backtrack {
260   backtrack_common common;
261   /* Where to coninue if an alternative is successfully matched. */
262   struct sljit_label *alternative_matchingpath;
263   /* For rmin and rmax iterators. */
264   struct sljit_label *recursive_matchingpath;
265   /* For greedy ? operator. */
266   struct sljit_label *zero_matchingpath;
267   /* Contains the branches of a failed condition. */
268   union {
269     /* Both for OP_COND, OP_SCOND. */
270     jump_list *condfailed;
271     assert_backtrack *assert;
272     /* For OP_ONCE. Less than 0 if not needed. */
273     int framesize;
274   } u;
275   /* Points to our private memory word on the stack. */
276   int private_data_ptr;
277 } bracket_backtrack;
278 
279 typedef struct bracketpos_backtrack {
280   backtrack_common common;
281   /* Points to our private memory word on the stack. */
282   int private_data_ptr;
283   /* Reverting stack is needed. */
284   int framesize;
285   /* Allocated stack size. */
286   int stacksize;
287 } bracketpos_backtrack;
288 
289 typedef struct braminzero_backtrack {
290   backtrack_common common;
291   struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293 
294 typedef struct char_iterator_backtrack {
295   backtrack_common common;
296   /* Next iteration. */
297   struct sljit_label *matchingpath;
298   union {
299     jump_list *backtracks;
300     struct {
301       unsigned int othercasebit;
302       PCRE2_UCHAR chr;
303       BOOL enabled;
304     } charpos;
305   } u;
306 } char_iterator_backtrack;
307 
308 typedef struct ref_iterator_backtrack {
309   backtrack_common common;
310   /* Next iteration. */
311   struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313 
314 typedef struct recurse_entry {
315   struct recurse_entry *next;
316   /* Contains the function entry label. */
317   struct sljit_label *entry_label;
318   /* Contains the function entry label. */
319   struct sljit_label *backtrack_label;
320   /* Collects the entry calls until the function is not created. */
321   jump_list *entry_calls;
322   /* Collects the backtrack calls until the function is not created. */
323   jump_list *backtrack_calls;
324   /* Points to the starting opcode. */
325   sljit_sw start;
326 } recurse_entry;
327 
328 typedef struct recurse_backtrack {
329   backtrack_common common;
330   /* Return to the matching path. */
331   struct sljit_label *matchingpath;
332   /* Recursive pattern. */
333   recurse_entry *entry;
334   /* Pattern is inlined. */
335   BOOL inlined_pattern;
336 } recurse_backtrack;
337 
338 #define OP_THEN_TRAP OP_TABLE_LENGTH
339 
340 typedef struct then_trap_backtrack {
341   backtrack_common common;
342   /* If then_trap is not NULL, this structure contains the real
343   then_trap for the backtracking path. */
344   struct then_trap_backtrack *then_trap;
345   /* Points to the starting opcode. */
346   sljit_sw start;
347   /* Exit point for the then opcodes of this alternative. */
348   jump_list *quit;
349   /* Frame size of the current alternative. */
350   int framesize;
351 } then_trap_backtrack;
352 
353 #define MAX_N_CHARS 12
354 #define MAX_DIFF_CHARS 5
355 
356 typedef struct fast_forward_char_data {
357   /* Number of characters in the chars array, 255 for any character. */
358   sljit_u8 count;
359   /* Number of last UTF-8 characters in the chars array. */
360   sljit_u8 last_count;
361   /* Available characters in the current position. */
362   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
363 } fast_forward_char_data;
364 
365 #define MAX_CLASS_RANGE_SIZE 4
366 #define MAX_CLASS_CHARS_SIZE 3
367 
368 typedef struct compiler_common {
369   /* The sljit ceneric compiler. */
370   struct sljit_compiler *compiler;
371   /* Compiled regular expression. */
372   pcre2_real_code *re;
373   /* First byte code. */
374   PCRE2_SPTR start;
375   /* Maps private data offset to each opcode. */
376   sljit_s32 *private_data_ptrs;
377   /* Chain list of read-only data ptrs. */
378   void *read_only_data_head;
379   /* Tells whether the capturing bracket is optimized. */
380   sljit_u8 *optimized_cbracket;
381   /* Tells whether the starting offset is a target of then. */
382   sljit_u8 *then_offsets;
383   /* Current position where a THEN must jump. */
384   then_trap_backtrack *then_trap;
385   /* Starting offset of private data for capturing brackets. */
386   sljit_s32 cbra_ptr;
387   /* Output vector starting point. Must be divisible by 2. */
388   sljit_s32 ovector_start;
389   /* Points to the starting character of the current match. */
390   sljit_s32 start_ptr;
391   /* Last known position of the requested byte. */
392   sljit_s32 req_char_ptr;
393   /* Head of the last recursion. */
394   sljit_s32 recursive_head_ptr;
395   /* First inspected character for partial matching.
396      (Needed for avoiding zero length partial matches.) */
397   sljit_s32 start_used_ptr;
398   /* Starting pointer for partial soft matches. */
399   sljit_s32 hit_start;
400   /* Pointer of the match end position. */
401   sljit_s32 match_end_ptr;
402   /* Points to the marked string. */
403   sljit_s32 mark_ptr;
404   /* Recursive control verb management chain. */
405   sljit_s32 control_head_ptr;
406   /* Points to the last matched capture block index. */
407   sljit_s32 capture_last_ptr;
408   /* Fast forward skipping byte code pointer. */
409   PCRE2_SPTR fast_forward_bc_ptr;
410   /* Locals used by fast fail optimization. */
411   sljit_s32 fast_fail_start_ptr;
412   sljit_s32 fast_fail_end_ptr;
413 
414   /* Flipped and lower case tables. */
415   const sljit_u8 *fcc;
416   sljit_sw lcc;
417   /* Mode can be PCRE2_JIT_COMPLETE and others. */
418   int mode;
419   /* TRUE, when minlength is greater than 0. */
420   BOOL might_be_empty;
421   /* \K is found in the pattern. */
422   BOOL has_set_som;
423   /* (*SKIP:arg) is found in the pattern. */
424   BOOL has_skip_arg;
425   /* (*THEN) is found in the pattern. */
426   BOOL has_then;
427   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
428   BOOL has_skip_in_assert_back;
429   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
430   BOOL local_quit_available;
431   /* Currently in a positive assertion. */
432   BOOL in_positive_assertion;
433   /* Newline control. */
434   int nltype;
435   sljit_u32 nlmax;
436   sljit_u32 nlmin;
437   int newline;
438   int bsr_nltype;
439   sljit_u32 bsr_nlmax;
440   sljit_u32 bsr_nlmin;
441   /* Dollar endonly. */
442   int endonly;
443   /* Tables. */
444   sljit_sw ctypes;
445   /* Named capturing brackets. */
446   PCRE2_SPTR name_table;
447   sljit_sw name_count;
448   sljit_sw name_entry_size;
449 
450   /* Labels and jump lists. */
451   struct sljit_label *partialmatchlabel;
452   struct sljit_label *quit_label;
453   struct sljit_label *abort_label;
454   struct sljit_label *accept_label;
455   struct sljit_label *ff_newline_shortcut;
456   stub_list *stubs;
457   label_addr_list *label_addrs;
458   recurse_entry *entries;
459   recurse_entry *currententry;
460   jump_list *partialmatch;
461   jump_list *quit;
462   jump_list *positive_assertion_quit;
463   jump_list *abort;
464   jump_list *failed_match;
465   jump_list *accept;
466   jump_list *calllimit;
467   jump_list *stackalloc;
468   jump_list *revertframes;
469   jump_list *wordboundary;
470   jump_list *anynewline;
471   jump_list *hspace;
472   jump_list *vspace;
473   jump_list *casefulcmp;
474   jump_list *caselesscmp;
475   jump_list *reset_match;
476   BOOL unset_backref;
477   BOOL alt_circumflex;
478 #ifdef SUPPORT_UNICODE
479   BOOL utf;
480   BOOL invalid_utf;
481   BOOL use_ucp;
482   /* Points to saving area for iref. */
483   sljit_s32 iref_ptr;
484   jump_list *getucd;
485   jump_list *getucdtype;
486 #if PCRE2_CODE_UNIT_WIDTH == 8
487   jump_list *utfreadchar;
488   jump_list *utfreadtype8;
489   jump_list *utfpeakcharback;
490 #endif
491 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
492   jump_list *utfreadchar_invalid;
493   jump_list *utfreadnewline_invalid;
494   jump_list *utfmoveback_invalid;
495   jump_list *utfpeakcharback_invalid;
496 #endif
497 #endif /* SUPPORT_UNICODE */
498 } compiler_common;
499 
500 /* For byte_sequence_compare. */
501 
502 typedef struct compare_context {
503   int length;
504   int sourcereg;
505 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
506   int ucharptr;
507   union {
508     sljit_s32 asint;
509     sljit_u16 asushort;
510 #if PCRE2_CODE_UNIT_WIDTH == 8
511     sljit_u8 asbyte;
512     sljit_u8 asuchars[4];
513 #elif PCRE2_CODE_UNIT_WIDTH == 16
514     sljit_u16 asuchars[2];
515 #elif PCRE2_CODE_UNIT_WIDTH == 32
516     sljit_u32 asuchars[1];
517 #endif
518   } c;
519   union {
520     sljit_s32 asint;
521     sljit_u16 asushort;
522 #if PCRE2_CODE_UNIT_WIDTH == 8
523     sljit_u8 asbyte;
524     sljit_u8 asuchars[4];
525 #elif PCRE2_CODE_UNIT_WIDTH == 16
526     sljit_u16 asuchars[2];
527 #elif PCRE2_CODE_UNIT_WIDTH == 32
528     sljit_u32 asuchars[1];
529 #endif
530   } oc;
531 #endif
532 } compare_context;
533 
534 /* Undefine sljit macros. */
535 #undef CMP
536 
537 /* Used for accessing the elements of the stack. */
538 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
539 
540 #ifdef SLJIT_PREF_SHIFT_REG
541 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
542 /* Nothing. */
543 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
544 #define SHIFT_REG_IS_R3
545 #else
546 #error "Unsupported shift register"
547 #endif
548 #endif
549 
550 #define TMP1          SLJIT_R0
551 #ifdef SHIFT_REG_IS_R3
552 #define TMP2          SLJIT_R3
553 #define TMP3          SLJIT_R2
554 #else
555 #define TMP2          SLJIT_R2
556 #define TMP3          SLJIT_R3
557 #endif
558 #define STR_PTR       SLJIT_R1
559 #define STR_END       SLJIT_S0
560 #define STACK_TOP     SLJIT_S1
561 #define STACK_LIMIT   SLJIT_S2
562 #define COUNT_MATCH   SLJIT_S3
563 #define ARGUMENTS     SLJIT_S4
564 #define RETURN_ADDR   SLJIT_R4
565 
566 /* Local space layout. */
567 /* These two locals can be used by the current opcode. */
568 #define LOCALS0          (0 * sizeof(sljit_sw))
569 #define LOCALS1          (1 * sizeof(sljit_sw))
570 /* Two local variables for possessive quantifiers (char1 cannot use them). */
571 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
572 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
573 /* Max limit of recursions. */
574 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
575 /* The output vector is stored on the stack, and contains pointers
576 to characters. The vector data is divided into two groups: the first
577 group contains the start / end character pointers, and the second is
578 the start pointers when the end of the capturing group has not yet reached. */
579 #define OVECTOR_START    (common->ovector_start)
580 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
581 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
582 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
583 
584 #if PCRE2_CODE_UNIT_WIDTH == 8
585 #define MOV_UCHAR  SLJIT_MOV_U8
586 #define IN_UCHARS(x) (x)
587 #elif PCRE2_CODE_UNIT_WIDTH == 16
588 #define MOV_UCHAR  SLJIT_MOV_U16
589 #define UCHAR_SHIFT (1)
590 #define IN_UCHARS(x) ((x) * 2)
591 #elif PCRE2_CODE_UNIT_WIDTH == 32
592 #define MOV_UCHAR  SLJIT_MOV_U32
593 #define UCHAR_SHIFT (2)
594 #define IN_UCHARS(x) ((x) * 4)
595 #else
596 #error Unsupported compiling mode
597 #endif
598 
599 /* Shortcuts. */
600 #define DEFINE_COMPILER \
601   struct sljit_compiler *compiler = common->compiler
602 #define OP1(op, dst, dstw, src, srcw) \
603   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
604 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
605   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
606 #define LABEL() \
607   sljit_emit_label(compiler)
608 #define JUMP(type) \
609   sljit_emit_jump(compiler, (type))
610 #define JUMPTO(type, label) \
611   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
612 #define JUMPHERE(jump) \
613   sljit_set_label((jump), sljit_emit_label(compiler))
614 #define SET_LABEL(jump, label) \
615   sljit_set_label((jump), (label))
616 #define CMP(type, src1, src1w, src2, src2w) \
617   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
618 #define CMPTO(type, src1, src1w, src2, src2w, label) \
619   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
620 #define OP_FLAGS(op, dst, dstw, type) \
621   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
622 #define CMOV(type, dst_reg, src, srcw) \
623   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
624 #define GET_LOCAL_BASE(dst, dstw, offset) \
625   sljit_get_local_base(compiler, (dst), (dstw), (offset))
626 
627 #define READ_CHAR_MAX 0x7fffffff
628 
629 #define INVALID_UTF_CHAR -1
630 #define UNASSIGNED_UTF_CHAR 888
631 
632 #if defined SUPPORT_UNICODE
633 #if PCRE2_CODE_UNIT_WIDTH == 8
634 
635 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
636   { \
637   if (ptr[0] <= 0x7f) \
638     c = *ptr++; \
639   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
640     { \
641     c = ptr[1] - 0x80; \
642     \
643     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
644       { \
645       c |= (ptr[0] - 0xc0) << 6; \
646       ptr += 2; \
647       } \
648     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
649       { \
650       c = c << 6 | (ptr[2] - 0x80); \
651       \
652       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
653         { \
654         c |= (ptr[0] - 0xe0) << 12; \
655         ptr += 3; \
656         \
657         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
658           { \
659           invalid_action; \
660           } \
661         } \
662       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
663         { \
664         c = c << 6 | (ptr[3] - 0x80); \
665         \
666         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
667           { \
668           c |= (ptr[0] - 0xf0) << 18; \
669           ptr += 4; \
670           \
671           if (c >= 0x110000 || c < 0x10000) \
672             { \
673             invalid_action; \
674             } \
675           } \
676         else \
677           { \
678           invalid_action; \
679           } \
680         } \
681       else \
682         { \
683         invalid_action; \
684         } \
685       } \
686     else \
687       { \
688       invalid_action; \
689       } \
690     } \
691   else \
692     { \
693     invalid_action; \
694     } \
695   }
696 
697 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
698   { \
699   if (ptr[-1] <= 0x7f) \
700     c = *ptr--; \
701   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
702     { \
703     c = ptr[-1] - 0x80; \
704     \
705     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
706       { \
707       c |= (ptr[-2] - 0xc0) << 6; \
708       ptr -= 2; \
709       } \
710     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
711       { \
712       c = c << 6 | (ptr[-2] - 0x80); \
713       \
714       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
715         { \
716         c |= (ptr[-3] - 0xe0) << 12; \
717         ptr -= 3; \
718         \
719         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
720           { \
721           invalid_action; \
722           } \
723         } \
724       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
725         { \
726         c = c << 6 | (ptr[-3] - 0x80); \
727         \
728         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
729           { \
730           c |= (ptr[-4] - 0xf0) << 18; \
731           ptr -= 4; \
732           \
733           if (c >= 0x110000 || c < 0x10000) \
734             { \
735             invalid_action; \
736             } \
737           } \
738         else \
739           { \
740           invalid_action; \
741           } \
742         } \
743       else \
744         { \
745         invalid_action; \
746         } \
747       } \
748     else \
749       { \
750       invalid_action; \
751       } \
752     } \
753   else \
754     { \
755     invalid_action; \
756     } \
757   }
758 
759 #elif PCRE2_CODE_UNIT_WIDTH == 16
760 
761 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
762   { \
763   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
764     c = *ptr++; \
765   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
766     { \
767     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
768     ptr += 2; \
769     } \
770   else \
771     { \
772     invalid_action; \
773     } \
774   }
775 
776 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
777   { \
778   if (ptr[-1] < 0xd800 || ptr[-1] >= 0xe000) \
779     c = *ptr--; \
780   else if (ptr[-1] >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
781     { \
782     c = (((ptr[-2] - 0xd800) << 10) | (ptr[-1] - 0xdc00)) + 0x10000; \
783     ptr -= 2; \
784     } \
785   else \
786     { \
787     invalid_action; \
788     } \
789   }
790 
791 
792 #elif PCRE2_CODE_UNIT_WIDTH == 32
793 
794 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
795   { \
796   if (ptr[0] < 0x110000) \
797     c = *ptr++; \
798   else \
799     { \
800     invalid_action; \
801     } \
802   }
803 
804 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
805 #endif /* SUPPORT_UNICODE */
806 
bracketend(PCRE2_SPTR cc)807 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
808 {
809 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
810 do cc += GET(cc, 1); while (*cc == OP_ALT);
811 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
812 cc += 1 + LINK_SIZE;
813 return cc;
814 }
815 
no_alternatives(PCRE2_SPTR cc)816 static int no_alternatives(PCRE2_SPTR cc)
817 {
818 int count = 0;
819 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
820 do
821   {
822   cc += GET(cc, 1);
823   count++;
824   }
825 while (*cc == OP_ALT);
826 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
827 return count;
828 }
829 
830 /* Functions whose might need modification for all new supported opcodes:
831  next_opcode
832  check_opcode_types
833  set_private_data_ptrs
834  get_framesize
835  init_frame
836  get_recurse_data_length
837  copy_recurse_data
838  compile_matchingpath
839  compile_backtrackingpath
840 */
841 
next_opcode(compiler_common * common,PCRE2_SPTR cc)842 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
843 {
844 SLJIT_UNUSED_ARG(common);
845 switch(*cc)
846   {
847   case OP_SOD:
848   case OP_SOM:
849   case OP_SET_SOM:
850   case OP_NOT_WORD_BOUNDARY:
851   case OP_WORD_BOUNDARY:
852   case OP_NOT_DIGIT:
853   case OP_DIGIT:
854   case OP_NOT_WHITESPACE:
855   case OP_WHITESPACE:
856   case OP_NOT_WORDCHAR:
857   case OP_WORDCHAR:
858   case OP_ANY:
859   case OP_ALLANY:
860   case OP_NOTPROP:
861   case OP_PROP:
862   case OP_ANYNL:
863   case OP_NOT_HSPACE:
864   case OP_HSPACE:
865   case OP_NOT_VSPACE:
866   case OP_VSPACE:
867   case OP_EXTUNI:
868   case OP_EODN:
869   case OP_EOD:
870   case OP_CIRC:
871   case OP_CIRCM:
872   case OP_DOLL:
873   case OP_DOLLM:
874   case OP_CRSTAR:
875   case OP_CRMINSTAR:
876   case OP_CRPLUS:
877   case OP_CRMINPLUS:
878   case OP_CRQUERY:
879   case OP_CRMINQUERY:
880   case OP_CRRANGE:
881   case OP_CRMINRANGE:
882   case OP_CRPOSSTAR:
883   case OP_CRPOSPLUS:
884   case OP_CRPOSQUERY:
885   case OP_CRPOSRANGE:
886   case OP_CLASS:
887   case OP_NCLASS:
888   case OP_REF:
889   case OP_REFI:
890   case OP_DNREF:
891   case OP_DNREFI:
892   case OP_RECURSE:
893   case OP_CALLOUT:
894   case OP_ALT:
895   case OP_KET:
896   case OP_KETRMAX:
897   case OP_KETRMIN:
898   case OP_KETRPOS:
899   case OP_REVERSE:
900   case OP_ASSERT:
901   case OP_ASSERT_NOT:
902   case OP_ASSERTBACK:
903   case OP_ASSERTBACK_NOT:
904   case OP_ONCE:
905   case OP_SCRIPT_RUN:
906   case OP_BRA:
907   case OP_BRAPOS:
908   case OP_CBRA:
909   case OP_CBRAPOS:
910   case OP_COND:
911   case OP_SBRA:
912   case OP_SBRAPOS:
913   case OP_SCBRA:
914   case OP_SCBRAPOS:
915   case OP_SCOND:
916   case OP_CREF:
917   case OP_DNCREF:
918   case OP_RREF:
919   case OP_DNRREF:
920   case OP_FALSE:
921   case OP_TRUE:
922   case OP_BRAZERO:
923   case OP_BRAMINZERO:
924   case OP_BRAPOSZERO:
925   case OP_PRUNE:
926   case OP_SKIP:
927   case OP_THEN:
928   case OP_COMMIT:
929   case OP_FAIL:
930   case OP_ACCEPT:
931   case OP_ASSERT_ACCEPT:
932   case OP_CLOSE:
933   case OP_SKIPZERO:
934   return cc + PRIV(OP_lengths)[*cc];
935 
936   case OP_CHAR:
937   case OP_CHARI:
938   case OP_NOT:
939   case OP_NOTI:
940   case OP_STAR:
941   case OP_MINSTAR:
942   case OP_PLUS:
943   case OP_MINPLUS:
944   case OP_QUERY:
945   case OP_MINQUERY:
946   case OP_UPTO:
947   case OP_MINUPTO:
948   case OP_EXACT:
949   case OP_POSSTAR:
950   case OP_POSPLUS:
951   case OP_POSQUERY:
952   case OP_POSUPTO:
953   case OP_STARI:
954   case OP_MINSTARI:
955   case OP_PLUSI:
956   case OP_MINPLUSI:
957   case OP_QUERYI:
958   case OP_MINQUERYI:
959   case OP_UPTOI:
960   case OP_MINUPTOI:
961   case OP_EXACTI:
962   case OP_POSSTARI:
963   case OP_POSPLUSI:
964   case OP_POSQUERYI:
965   case OP_POSUPTOI:
966   case OP_NOTSTAR:
967   case OP_NOTMINSTAR:
968   case OP_NOTPLUS:
969   case OP_NOTMINPLUS:
970   case OP_NOTQUERY:
971   case OP_NOTMINQUERY:
972   case OP_NOTUPTO:
973   case OP_NOTMINUPTO:
974   case OP_NOTEXACT:
975   case OP_NOTPOSSTAR:
976   case OP_NOTPOSPLUS:
977   case OP_NOTPOSQUERY:
978   case OP_NOTPOSUPTO:
979   case OP_NOTSTARI:
980   case OP_NOTMINSTARI:
981   case OP_NOTPLUSI:
982   case OP_NOTMINPLUSI:
983   case OP_NOTQUERYI:
984   case OP_NOTMINQUERYI:
985   case OP_NOTUPTOI:
986   case OP_NOTMINUPTOI:
987   case OP_NOTEXACTI:
988   case OP_NOTPOSSTARI:
989   case OP_NOTPOSPLUSI:
990   case OP_NOTPOSQUERYI:
991   case OP_NOTPOSUPTOI:
992   cc += PRIV(OP_lengths)[*cc];
993 #ifdef SUPPORT_UNICODE
994   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
995 #endif
996   return cc;
997 
998   /* Special cases. */
999   case OP_TYPESTAR:
1000   case OP_TYPEMINSTAR:
1001   case OP_TYPEPLUS:
1002   case OP_TYPEMINPLUS:
1003   case OP_TYPEQUERY:
1004   case OP_TYPEMINQUERY:
1005   case OP_TYPEUPTO:
1006   case OP_TYPEMINUPTO:
1007   case OP_TYPEEXACT:
1008   case OP_TYPEPOSSTAR:
1009   case OP_TYPEPOSPLUS:
1010   case OP_TYPEPOSQUERY:
1011   case OP_TYPEPOSUPTO:
1012   return cc + PRIV(OP_lengths)[*cc] - 1;
1013 
1014   case OP_ANYBYTE:
1015 #ifdef SUPPORT_UNICODE
1016   if (common->utf) return NULL;
1017 #endif
1018   return cc + 1;
1019 
1020   case OP_CALLOUT_STR:
1021   return cc + GET(cc, 1 + 2*LINK_SIZE);
1022 
1023 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1024   case OP_XCLASS:
1025   return cc + GET(cc, 1);
1026 #endif
1027 
1028   case OP_MARK:
1029   case OP_COMMIT_ARG:
1030   case OP_PRUNE_ARG:
1031   case OP_SKIP_ARG:
1032   case OP_THEN_ARG:
1033   return cc + 1 + 2 + cc[1];
1034 
1035   default:
1036   /* All opcodes are supported now! */
1037   SLJIT_UNREACHABLE();
1038   return NULL;
1039   }
1040 }
1041 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1042 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1043 {
1044 int count;
1045 PCRE2_SPTR slot;
1046 PCRE2_SPTR assert_back_end = cc - 1;
1047 
1048 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1049 while (cc < ccend)
1050   {
1051   switch(*cc)
1052     {
1053     case OP_SET_SOM:
1054     common->has_set_som = TRUE;
1055     common->might_be_empty = TRUE;
1056     cc += 1;
1057     break;
1058 
1059     case OP_REFI:
1060 #ifdef SUPPORT_UNICODE
1061     if (common->iref_ptr == 0)
1062       {
1063       common->iref_ptr = common->ovector_start;
1064       common->ovector_start += 3 * sizeof(sljit_sw);
1065       }
1066 #endif /* SUPPORT_UNICODE */
1067     /* Fall through. */
1068     case OP_REF:
1069     common->optimized_cbracket[GET2(cc, 1)] = 0;
1070     cc += 1 + IMM2_SIZE;
1071     break;
1072 
1073     case OP_CBRAPOS:
1074     case OP_SCBRAPOS:
1075     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1076     cc += 1 + LINK_SIZE + IMM2_SIZE;
1077     break;
1078 
1079     case OP_COND:
1080     case OP_SCOND:
1081     /* Only AUTO_CALLOUT can insert this opcode. We do
1082        not intend to support this case. */
1083     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1084       return FALSE;
1085     cc += 1 + LINK_SIZE;
1086     break;
1087 
1088     case OP_CREF:
1089     common->optimized_cbracket[GET2(cc, 1)] = 0;
1090     cc += 1 + IMM2_SIZE;
1091     break;
1092 
1093     case OP_DNREF:
1094     case OP_DNREFI:
1095     case OP_DNCREF:
1096     count = GET2(cc, 1 + IMM2_SIZE);
1097     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1098     while (count-- > 0)
1099       {
1100       common->optimized_cbracket[GET2(slot, 0)] = 0;
1101       slot += common->name_entry_size;
1102       }
1103     cc += 1 + 2 * IMM2_SIZE;
1104     break;
1105 
1106     case OP_RECURSE:
1107     /* Set its value only once. */
1108     if (common->recursive_head_ptr == 0)
1109       {
1110       common->recursive_head_ptr = common->ovector_start;
1111       common->ovector_start += sizeof(sljit_sw);
1112       }
1113     cc += 1 + LINK_SIZE;
1114     break;
1115 
1116     case OP_CALLOUT:
1117     case OP_CALLOUT_STR:
1118     if (common->capture_last_ptr == 0)
1119       {
1120       common->capture_last_ptr = common->ovector_start;
1121       common->ovector_start += sizeof(sljit_sw);
1122       }
1123     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1124     break;
1125 
1126     case OP_ASSERTBACK:
1127     slot = bracketend(cc);
1128     if (slot > assert_back_end)
1129       assert_back_end = slot;
1130     cc += 1 + LINK_SIZE;
1131     break;
1132 
1133     case OP_THEN_ARG:
1134     common->has_then = TRUE;
1135     common->control_head_ptr = 1;
1136     /* Fall through. */
1137 
1138     case OP_COMMIT_ARG:
1139     case OP_PRUNE_ARG:
1140     case OP_MARK:
1141     if (common->mark_ptr == 0)
1142       {
1143       common->mark_ptr = common->ovector_start;
1144       common->ovector_start += sizeof(sljit_sw);
1145       }
1146     cc += 1 + 2 + cc[1];
1147     break;
1148 
1149     case OP_THEN:
1150     common->has_then = TRUE;
1151     common->control_head_ptr = 1;
1152     cc += 1;
1153     break;
1154 
1155     case OP_SKIP:
1156     if (cc < assert_back_end)
1157       common->has_skip_in_assert_back = TRUE;
1158     cc += 1;
1159     break;
1160 
1161     case OP_SKIP_ARG:
1162     common->control_head_ptr = 1;
1163     common->has_skip_arg = TRUE;
1164     if (cc < assert_back_end)
1165       common->has_skip_in_assert_back = TRUE;
1166     cc += 1 + 2 + cc[1];
1167     break;
1168 
1169     default:
1170     cc = next_opcode(common, cc);
1171     if (cc == NULL)
1172       return FALSE;
1173     break;
1174     }
1175   }
1176 return TRUE;
1177 }
1178 
is_accelerated_repeat(PCRE2_SPTR cc)1179 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
1180 {
1181 switch(*cc)
1182   {
1183   case OP_TYPESTAR:
1184   case OP_TYPEMINSTAR:
1185   case OP_TYPEPLUS:
1186   case OP_TYPEMINPLUS:
1187   case OP_TYPEPOSSTAR:
1188   case OP_TYPEPOSPLUS:
1189   return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
1190 
1191   case OP_STAR:
1192   case OP_MINSTAR:
1193   case OP_PLUS:
1194   case OP_MINPLUS:
1195   case OP_POSSTAR:
1196   case OP_POSPLUS:
1197 
1198   case OP_STARI:
1199   case OP_MINSTARI:
1200   case OP_PLUSI:
1201   case OP_MINPLUSI:
1202   case OP_POSSTARI:
1203   case OP_POSPLUSI:
1204 
1205   case OP_NOTSTAR:
1206   case OP_NOTMINSTAR:
1207   case OP_NOTPLUS:
1208   case OP_NOTMINPLUS:
1209   case OP_NOTPOSSTAR:
1210   case OP_NOTPOSPLUS:
1211 
1212   case OP_NOTSTARI:
1213   case OP_NOTMINSTARI:
1214   case OP_NOTPLUSI:
1215   case OP_NOTMINPLUSI:
1216   case OP_NOTPOSSTARI:
1217   case OP_NOTPOSPLUSI:
1218   return TRUE;
1219 
1220   case OP_CLASS:
1221   case OP_NCLASS:
1222 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1223   case OP_XCLASS:
1224   cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1225 #else
1226   cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1227 #endif
1228 
1229   switch(*cc)
1230     {
1231     case OP_CRSTAR:
1232     case OP_CRMINSTAR:
1233     case OP_CRPLUS:
1234     case OP_CRMINPLUS:
1235     case OP_CRPOSSTAR:
1236     case OP_CRPOSPLUS:
1237     return TRUE;
1238     }
1239   break;
1240   }
1241 return FALSE;
1242 }
1243 
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1244 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1245 {
1246 PCRE2_SPTR cc = common->start;
1247 PCRE2_SPTR end;
1248 
1249 /* Skip not repeated brackets. */
1250 while (TRUE)
1251   {
1252   switch(*cc)
1253     {
1254     case OP_SOD:
1255     case OP_SOM:
1256     case OP_SET_SOM:
1257     case OP_NOT_WORD_BOUNDARY:
1258     case OP_WORD_BOUNDARY:
1259     case OP_EODN:
1260     case OP_EOD:
1261     case OP_CIRC:
1262     case OP_CIRCM:
1263     case OP_DOLL:
1264     case OP_DOLLM:
1265     /* Zero width assertions. */
1266     cc++;
1267     continue;
1268     }
1269 
1270   if (*cc != OP_BRA && *cc != OP_CBRA)
1271     break;
1272 
1273   end = cc + GET(cc, 1);
1274   if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1275     return FALSE;
1276   if (*cc == OP_CBRA)
1277     {
1278     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1279       return FALSE;
1280     cc += IMM2_SIZE;
1281     }
1282   cc += 1 + LINK_SIZE;
1283   }
1284 
1285 if (is_accelerated_repeat(cc))
1286   {
1287   common->fast_forward_bc_ptr = cc;
1288   common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1289   *private_data_start += sizeof(sljit_sw);
1290   return TRUE;
1291   }
1292 return FALSE;
1293 }
1294 
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1295 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1296 {
1297   PCRE2_SPTR next_alt;
1298 
1299   SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1300 
1301   if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1302     return;
1303 
1304   next_alt = bracketend(cc) - (1 + LINK_SIZE);
1305   if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1306     return;
1307 
1308   do
1309     {
1310     next_alt = cc + GET(cc, 1);
1311 
1312     cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1313 
1314     while (TRUE)
1315       {
1316       switch(*cc)
1317         {
1318         case OP_SOD:
1319         case OP_SOM:
1320         case OP_SET_SOM:
1321         case OP_NOT_WORD_BOUNDARY:
1322         case OP_WORD_BOUNDARY:
1323         case OP_EODN:
1324         case OP_EOD:
1325         case OP_CIRC:
1326         case OP_CIRCM:
1327         case OP_DOLL:
1328         case OP_DOLLM:
1329         /* Zero width assertions. */
1330         cc++;
1331         continue;
1332         }
1333       break;
1334       }
1335 
1336     if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1337       detect_fast_fail(common, cc, private_data_start, depth - 1);
1338 
1339     if (is_accelerated_repeat(cc))
1340       {
1341       common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1342 
1343       if (common->fast_fail_start_ptr == 0)
1344         common->fast_fail_start_ptr = *private_data_start;
1345 
1346       *private_data_start += sizeof(sljit_sw);
1347       common->fast_fail_end_ptr = *private_data_start;
1348 
1349       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1350         return;
1351       }
1352 
1353     cc = next_alt;
1354     }
1355   while (*cc == OP_ALT);
1356 }
1357 
get_class_iterator_size(PCRE2_SPTR cc)1358 static int get_class_iterator_size(PCRE2_SPTR cc)
1359 {
1360 sljit_u32 min;
1361 sljit_u32 max;
1362 switch(*cc)
1363   {
1364   case OP_CRSTAR:
1365   case OP_CRPLUS:
1366   return 2;
1367 
1368   case OP_CRMINSTAR:
1369   case OP_CRMINPLUS:
1370   case OP_CRQUERY:
1371   case OP_CRMINQUERY:
1372   return 1;
1373 
1374   case OP_CRRANGE:
1375   case OP_CRMINRANGE:
1376   min = GET2(cc, 1);
1377   max = GET2(cc, 1 + IMM2_SIZE);
1378   if (max == 0)
1379     return (*cc == OP_CRRANGE) ? 2 : 1;
1380   max -= min;
1381   if (max > 2)
1382     max = 2;
1383   return max;
1384 
1385   default:
1386   return 0;
1387   }
1388 }
1389 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1390 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1391 {
1392 PCRE2_SPTR end = bracketend(begin);
1393 PCRE2_SPTR next;
1394 PCRE2_SPTR next_end;
1395 PCRE2_SPTR max_end;
1396 PCRE2_UCHAR type;
1397 sljit_sw length = end - begin;
1398 sljit_s32 min, max, i;
1399 
1400 /* Detect fixed iterations first. */
1401 if (end[-(1 + LINK_SIZE)] != OP_KET)
1402   return FALSE;
1403 
1404 /* Already detected repeat. */
1405 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1406   return TRUE;
1407 
1408 next = end;
1409 min = 1;
1410 while (1)
1411   {
1412   if (*next != *begin)
1413     break;
1414   next_end = bracketend(next);
1415   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1416     break;
1417   next = next_end;
1418   min++;
1419   }
1420 
1421 if (min == 2)
1422   return FALSE;
1423 
1424 max = 0;
1425 max_end = next;
1426 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1427   {
1428   type = *next;
1429   while (1)
1430     {
1431     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1432       break;
1433     next_end = bracketend(next + 2 + LINK_SIZE);
1434     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1435       break;
1436     next = next_end;
1437     max++;
1438     }
1439 
1440   if (next[0] == type && next[1] == *begin && max >= 1)
1441     {
1442     next_end = bracketend(next + 1);
1443     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1444       {
1445       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1446         if (*next_end != OP_KET)
1447           break;
1448 
1449       if (i == max)
1450         {
1451         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1452         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1453         /* +2 the original and the last. */
1454         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1455         if (min == 1)
1456           return TRUE;
1457         min--;
1458         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1459         }
1460       }
1461     }
1462   }
1463 
1464 if (min >= 3)
1465   {
1466   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1467   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1468   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1469   return TRUE;
1470   }
1471 
1472 return FALSE;
1473 }
1474 
1475 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1476     case OP_MINSTAR: \
1477     case OP_MINPLUS: \
1478     case OP_QUERY: \
1479     case OP_MINQUERY: \
1480     case OP_MINSTARI: \
1481     case OP_MINPLUSI: \
1482     case OP_QUERYI: \
1483     case OP_MINQUERYI: \
1484     case OP_NOTMINSTAR: \
1485     case OP_NOTMINPLUS: \
1486     case OP_NOTQUERY: \
1487     case OP_NOTMINQUERY: \
1488     case OP_NOTMINSTARI: \
1489     case OP_NOTMINPLUSI: \
1490     case OP_NOTQUERYI: \
1491     case OP_NOTMINQUERYI:
1492 
1493 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1494     case OP_STAR: \
1495     case OP_PLUS: \
1496     case OP_STARI: \
1497     case OP_PLUSI: \
1498     case OP_NOTSTAR: \
1499     case OP_NOTPLUS: \
1500     case OP_NOTSTARI: \
1501     case OP_NOTPLUSI:
1502 
1503 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1504     case OP_UPTO: \
1505     case OP_MINUPTO: \
1506     case OP_UPTOI: \
1507     case OP_MINUPTOI: \
1508     case OP_NOTUPTO: \
1509     case OP_NOTMINUPTO: \
1510     case OP_NOTUPTOI: \
1511     case OP_NOTMINUPTOI:
1512 
1513 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1514     case OP_TYPEMINSTAR: \
1515     case OP_TYPEMINPLUS: \
1516     case OP_TYPEQUERY: \
1517     case OP_TYPEMINQUERY:
1518 
1519 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1520     case OP_TYPESTAR: \
1521     case OP_TYPEPLUS:
1522 
1523 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1524     case OP_TYPEUPTO: \
1525     case OP_TYPEMINUPTO:
1526 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1527 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1528 {
1529 PCRE2_SPTR cc = common->start;
1530 PCRE2_SPTR alternative;
1531 PCRE2_SPTR end = NULL;
1532 int private_data_ptr = *private_data_start;
1533 int space, size, bracketlen;
1534 BOOL repeat_check = TRUE;
1535 
1536 while (cc < ccend)
1537   {
1538   space = 0;
1539   size = 0;
1540   bracketlen = 0;
1541   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1542     break;
1543 
1544   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1545     {
1546     if (detect_repeat(common, cc))
1547       {
1548       /* These brackets are converted to repeats, so no global
1549       based single character repeat is allowed. */
1550       if (cc >= end)
1551         end = bracketend(cc);
1552       }
1553     }
1554   repeat_check = TRUE;
1555 
1556   switch(*cc)
1557     {
1558     case OP_KET:
1559     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1560       {
1561       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1562       private_data_ptr += sizeof(sljit_sw);
1563       cc += common->private_data_ptrs[cc + 1 - common->start];
1564       }
1565     cc += 1 + LINK_SIZE;
1566     break;
1567 
1568     case OP_ASSERT:
1569     case OP_ASSERT_NOT:
1570     case OP_ASSERTBACK:
1571     case OP_ASSERTBACK_NOT:
1572     case OP_ONCE:
1573     case OP_SCRIPT_RUN:
1574     case OP_BRAPOS:
1575     case OP_SBRA:
1576     case OP_SBRAPOS:
1577     case OP_SCOND:
1578     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1579     private_data_ptr += sizeof(sljit_sw);
1580     bracketlen = 1 + LINK_SIZE;
1581     break;
1582 
1583     case OP_CBRAPOS:
1584     case OP_SCBRAPOS:
1585     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1586     private_data_ptr += sizeof(sljit_sw);
1587     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1588     break;
1589 
1590     case OP_COND:
1591     /* Might be a hidden SCOND. */
1592     alternative = cc + GET(cc, 1);
1593     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1594       {
1595       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1596       private_data_ptr += sizeof(sljit_sw);
1597       }
1598     bracketlen = 1 + LINK_SIZE;
1599     break;
1600 
1601     case OP_BRA:
1602     bracketlen = 1 + LINK_SIZE;
1603     break;
1604 
1605     case OP_CBRA:
1606     case OP_SCBRA:
1607     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1608     break;
1609 
1610     case OP_BRAZERO:
1611     case OP_BRAMINZERO:
1612     case OP_BRAPOSZERO:
1613     repeat_check = FALSE;
1614     size = 1;
1615     break;
1616 
1617     CASE_ITERATOR_PRIVATE_DATA_1
1618     space = 1;
1619     size = -2;
1620     break;
1621 
1622     CASE_ITERATOR_PRIVATE_DATA_2A
1623     space = 2;
1624     size = -2;
1625     break;
1626 
1627     CASE_ITERATOR_PRIVATE_DATA_2B
1628     space = 2;
1629     size = -(2 + IMM2_SIZE);
1630     break;
1631 
1632     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1633     space = 1;
1634     size = 1;
1635     break;
1636 
1637     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1638     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1639       space = 2;
1640     size = 1;
1641     break;
1642 
1643     case OP_TYPEUPTO:
1644     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1645       space = 2;
1646     size = 1 + IMM2_SIZE;
1647     break;
1648 
1649     case OP_TYPEMINUPTO:
1650     space = 2;
1651     size = 1 + IMM2_SIZE;
1652     break;
1653 
1654     case OP_CLASS:
1655     case OP_NCLASS:
1656     space = get_class_iterator_size(cc + size);
1657     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1658     break;
1659 
1660 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1661     case OP_XCLASS:
1662     space = get_class_iterator_size(cc + size);
1663     size = GET(cc, 1);
1664     break;
1665 #endif
1666 
1667     default:
1668     cc = next_opcode(common, cc);
1669     SLJIT_ASSERT(cc != NULL);
1670     break;
1671     }
1672 
1673   /* Character iterators, which are not inside a repeated bracket,
1674      gets a private slot instead of allocating it on the stack. */
1675   if (space > 0 && cc >= end)
1676     {
1677     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1678     private_data_ptr += sizeof(sljit_sw) * space;
1679     }
1680 
1681   if (size != 0)
1682     {
1683     if (size < 0)
1684       {
1685       cc += -size;
1686 #ifdef SUPPORT_UNICODE
1687       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1688 #endif
1689       }
1690     else
1691       cc += size;
1692     }
1693 
1694   if (bracketlen > 0)
1695     {
1696     if (cc >= end)
1697       {
1698       end = bracketend(cc);
1699       if (end[-1 - LINK_SIZE] == OP_KET)
1700         end = NULL;
1701       }
1702     cc += bracketlen;
1703     }
1704   }
1705 *private_data_start = private_data_ptr;
1706 }
1707 
1708 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1709 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1710 {
1711 int length = 0;
1712 int possessive = 0;
1713 BOOL stack_restore = FALSE;
1714 BOOL setsom_found = recursive;
1715 BOOL setmark_found = recursive;
1716 /* The last capture is a local variable even for recursions. */
1717 BOOL capture_last_found = FALSE;
1718 
1719 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 *needs_control_head = TRUE;
1722 #else
1723 *needs_control_head = FALSE;
1724 #endif
1725 
1726 if (ccend == NULL)
1727   {
1728   ccend = bracketend(cc) - (1 + LINK_SIZE);
1729   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1730     {
1731     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1732     /* This is correct regardless of common->capture_last_ptr. */
1733     capture_last_found = TRUE;
1734     }
1735   cc = next_opcode(common, cc);
1736   }
1737 
1738 SLJIT_ASSERT(cc != NULL);
1739 while (cc < ccend)
1740   switch(*cc)
1741     {
1742     case OP_SET_SOM:
1743     SLJIT_ASSERT(common->has_set_som);
1744     stack_restore = TRUE;
1745     if (!setsom_found)
1746       {
1747       length += 2;
1748       setsom_found = TRUE;
1749       }
1750     cc += 1;
1751     break;
1752 
1753     case OP_MARK:
1754     case OP_COMMIT_ARG:
1755     case OP_PRUNE_ARG:
1756     case OP_THEN_ARG:
1757     SLJIT_ASSERT(common->mark_ptr != 0);
1758     stack_restore = TRUE;
1759     if (!setmark_found)
1760       {
1761       length += 2;
1762       setmark_found = TRUE;
1763       }
1764     if (common->control_head_ptr != 0)
1765       *needs_control_head = TRUE;
1766     cc += 1 + 2 + cc[1];
1767     break;
1768 
1769     case OP_RECURSE:
1770     stack_restore = TRUE;
1771     if (common->has_set_som && !setsom_found)
1772       {
1773       length += 2;
1774       setsom_found = TRUE;
1775       }
1776     if (common->mark_ptr != 0 && !setmark_found)
1777       {
1778       length += 2;
1779       setmark_found = TRUE;
1780       }
1781     if (common->capture_last_ptr != 0 && !capture_last_found)
1782       {
1783       length += 2;
1784       capture_last_found = TRUE;
1785       }
1786     cc += 1 + LINK_SIZE;
1787     break;
1788 
1789     case OP_CBRA:
1790     case OP_CBRAPOS:
1791     case OP_SCBRA:
1792     case OP_SCBRAPOS:
1793     stack_restore = TRUE;
1794     if (common->capture_last_ptr != 0 && !capture_last_found)
1795       {
1796       length += 2;
1797       capture_last_found = TRUE;
1798       }
1799     length += 3;
1800     cc += 1 + LINK_SIZE + IMM2_SIZE;
1801     break;
1802 
1803     case OP_THEN:
1804     stack_restore = TRUE;
1805     if (common->control_head_ptr != 0)
1806       *needs_control_head = TRUE;
1807     cc ++;
1808     break;
1809 
1810     default:
1811     stack_restore = TRUE;
1812     /* Fall through. */
1813 
1814     case OP_NOT_WORD_BOUNDARY:
1815     case OP_WORD_BOUNDARY:
1816     case OP_NOT_DIGIT:
1817     case OP_DIGIT:
1818     case OP_NOT_WHITESPACE:
1819     case OP_WHITESPACE:
1820     case OP_NOT_WORDCHAR:
1821     case OP_WORDCHAR:
1822     case OP_ANY:
1823     case OP_ALLANY:
1824     case OP_ANYBYTE:
1825     case OP_NOTPROP:
1826     case OP_PROP:
1827     case OP_ANYNL:
1828     case OP_NOT_HSPACE:
1829     case OP_HSPACE:
1830     case OP_NOT_VSPACE:
1831     case OP_VSPACE:
1832     case OP_EXTUNI:
1833     case OP_EODN:
1834     case OP_EOD:
1835     case OP_CIRC:
1836     case OP_CIRCM:
1837     case OP_DOLL:
1838     case OP_DOLLM:
1839     case OP_CHAR:
1840     case OP_CHARI:
1841     case OP_NOT:
1842     case OP_NOTI:
1843 
1844     case OP_EXACT:
1845     case OP_POSSTAR:
1846     case OP_POSPLUS:
1847     case OP_POSQUERY:
1848     case OP_POSUPTO:
1849 
1850     case OP_EXACTI:
1851     case OP_POSSTARI:
1852     case OP_POSPLUSI:
1853     case OP_POSQUERYI:
1854     case OP_POSUPTOI:
1855 
1856     case OP_NOTEXACT:
1857     case OP_NOTPOSSTAR:
1858     case OP_NOTPOSPLUS:
1859     case OP_NOTPOSQUERY:
1860     case OP_NOTPOSUPTO:
1861 
1862     case OP_NOTEXACTI:
1863     case OP_NOTPOSSTARI:
1864     case OP_NOTPOSPLUSI:
1865     case OP_NOTPOSQUERYI:
1866     case OP_NOTPOSUPTOI:
1867 
1868     case OP_TYPEEXACT:
1869     case OP_TYPEPOSSTAR:
1870     case OP_TYPEPOSPLUS:
1871     case OP_TYPEPOSQUERY:
1872     case OP_TYPEPOSUPTO:
1873 
1874     case OP_CLASS:
1875     case OP_NCLASS:
1876     case OP_XCLASS:
1877 
1878     case OP_CALLOUT:
1879     case OP_CALLOUT_STR:
1880 
1881     cc = next_opcode(common, cc);
1882     SLJIT_ASSERT(cc != NULL);
1883     break;
1884     }
1885 
1886 /* Possessive quantifiers can use a special case. */
1887 if (SLJIT_UNLIKELY(possessive == length))
1888   return stack_restore ? no_frame : no_stack;
1889 
1890 if (length > 0)
1891   return length + 1;
1892 return stack_restore ? no_frame : no_stack;
1893 }
1894 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)1895 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
1896 {
1897 DEFINE_COMPILER;
1898 BOOL setsom_found = FALSE;
1899 BOOL setmark_found = FALSE;
1900 /* The last capture is a local variable even for recursions. */
1901 BOOL capture_last_found = FALSE;
1902 int offset;
1903 
1904 /* >= 1 + shortest item size (2) */
1905 SLJIT_UNUSED_ARG(stacktop);
1906 SLJIT_ASSERT(stackpos >= stacktop + 2);
1907 
1908 stackpos = STACK(stackpos);
1909 if (ccend == NULL)
1910   {
1911   ccend = bracketend(cc) - (1 + LINK_SIZE);
1912   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
1913     cc = next_opcode(common, cc);
1914   }
1915 
1916 SLJIT_ASSERT(cc != NULL);
1917 while (cc < ccend)
1918   switch(*cc)
1919     {
1920     case OP_SET_SOM:
1921     SLJIT_ASSERT(common->has_set_som);
1922     if (!setsom_found)
1923       {
1924       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1925       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1926       stackpos -= (int)sizeof(sljit_sw);
1927       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1928       stackpos -= (int)sizeof(sljit_sw);
1929       setsom_found = TRUE;
1930       }
1931     cc += 1;
1932     break;
1933 
1934     case OP_MARK:
1935     case OP_COMMIT_ARG:
1936     case OP_PRUNE_ARG:
1937     case OP_THEN_ARG:
1938     SLJIT_ASSERT(common->mark_ptr != 0);
1939     if (!setmark_found)
1940       {
1941       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1942       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1943       stackpos -= (int)sizeof(sljit_sw);
1944       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1945       stackpos -= (int)sizeof(sljit_sw);
1946       setmark_found = TRUE;
1947       }
1948     cc += 1 + 2 + cc[1];
1949     break;
1950 
1951     case OP_RECURSE:
1952     if (common->has_set_som && !setsom_found)
1953       {
1954       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1955       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1956       stackpos -= (int)sizeof(sljit_sw);
1957       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1958       stackpos -= (int)sizeof(sljit_sw);
1959       setsom_found = TRUE;
1960       }
1961     if (common->mark_ptr != 0 && !setmark_found)
1962       {
1963       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1964       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1965       stackpos -= (int)sizeof(sljit_sw);
1966       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1967       stackpos -= (int)sizeof(sljit_sw);
1968       setmark_found = TRUE;
1969       }
1970     if (common->capture_last_ptr != 0 && !capture_last_found)
1971       {
1972       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1973       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1974       stackpos -= (int)sizeof(sljit_sw);
1975       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1976       stackpos -= (int)sizeof(sljit_sw);
1977       capture_last_found = TRUE;
1978       }
1979     cc += 1 + LINK_SIZE;
1980     break;
1981 
1982     case OP_CBRA:
1983     case OP_CBRAPOS:
1984     case OP_SCBRA:
1985     case OP_SCBRAPOS:
1986     if (common->capture_last_ptr != 0 && !capture_last_found)
1987       {
1988       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1989       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1990       stackpos -= (int)sizeof(sljit_sw);
1991       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1992       stackpos -= (int)sizeof(sljit_sw);
1993       capture_last_found = TRUE;
1994       }
1995     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1996     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1997     stackpos -= (int)sizeof(sljit_sw);
1998     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1999     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2000     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2001     stackpos -= (int)sizeof(sljit_sw);
2002     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2003     stackpos -= (int)sizeof(sljit_sw);
2004 
2005     cc += 1 + LINK_SIZE + IMM2_SIZE;
2006     break;
2007 
2008     default:
2009     cc = next_opcode(common, cc);
2010     SLJIT_ASSERT(cc != NULL);
2011     break;
2012     }
2013 
2014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2015 SLJIT_ASSERT(stackpos == STACK(stacktop));
2016 }
2017 
2018 #define RECURSE_TMP_REG_COUNT 3
2019 
2020 typedef struct delayed_mem_copy_status {
2021   struct sljit_compiler *compiler;
2022   int store_bases[RECURSE_TMP_REG_COUNT];
2023   int store_offsets[RECURSE_TMP_REG_COUNT];
2024   int tmp_regs[RECURSE_TMP_REG_COUNT];
2025   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2026   int next_tmp_reg;
2027 } delayed_mem_copy_status;
2028 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2029 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2030 {
2031 int i;
2032 
2033 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2034   {
2035   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2036   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2037 
2038   status->store_bases[i] = -1;
2039   }
2040 status->next_tmp_reg = 0;
2041 status->compiler = common->compiler;
2042 }
2043 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2044 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2045   int store_base, sljit_sw store_offset)
2046 {
2047 struct sljit_compiler *compiler = status->compiler;
2048 int next_tmp_reg = status->next_tmp_reg;
2049 int tmp_reg = status->tmp_regs[next_tmp_reg];
2050 
2051 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2052 
2053 if (status->store_bases[next_tmp_reg] == -1)
2054   {
2055   /* Preserve virtual registers. */
2056   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2057     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2058   }
2059 else
2060   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2061 
2062 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2063 status->store_bases[next_tmp_reg] = store_base;
2064 status->store_offsets[next_tmp_reg] = store_offset;
2065 
2066 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2067 }
2068 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2069 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2070 {
2071 struct sljit_compiler *compiler = status->compiler;
2072 int next_tmp_reg = status->next_tmp_reg;
2073 int tmp_reg, saved_tmp_reg, i;
2074 
2075 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2076   {
2077   if (status->store_bases[next_tmp_reg] != -1)
2078     {
2079     tmp_reg = status->tmp_regs[next_tmp_reg];
2080     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2081 
2082     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2083 
2084     /* Restore virtual registers. */
2085     if (sljit_get_register_index(saved_tmp_reg) < 0)
2086       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2087     }
2088 
2089   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2090   }
2091 }
2092 
2093 #undef RECURSE_TMP_REG_COUNT
2094 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2095 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2096   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2097 {
2098 int length = 1;
2099 int size;
2100 PCRE2_SPTR alternative;
2101 BOOL quit_found = FALSE;
2102 BOOL accept_found = FALSE;
2103 BOOL setsom_found = FALSE;
2104 BOOL setmark_found = FALSE;
2105 BOOL capture_last_found = FALSE;
2106 BOOL control_head_found = FALSE;
2107 
2108 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2109 SLJIT_ASSERT(common->control_head_ptr != 0);
2110 control_head_found = TRUE;
2111 #endif
2112 
2113 /* Calculate the sum of the private machine words. */
2114 while (cc < ccend)
2115   {
2116   size = 0;
2117   switch(*cc)
2118     {
2119     case OP_SET_SOM:
2120     SLJIT_ASSERT(common->has_set_som);
2121     setsom_found = TRUE;
2122     cc += 1;
2123     break;
2124 
2125     case OP_RECURSE:
2126     if (common->has_set_som)
2127       setsom_found = TRUE;
2128     if (common->mark_ptr != 0)
2129       setmark_found = TRUE;
2130     if (common->capture_last_ptr != 0)
2131       capture_last_found = TRUE;
2132     cc += 1 + LINK_SIZE;
2133     break;
2134 
2135     case OP_KET:
2136     if (PRIVATE_DATA(cc) != 0)
2137       {
2138       length++;
2139       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2140       cc += PRIVATE_DATA(cc + 1);
2141       }
2142     cc += 1 + LINK_SIZE;
2143     break;
2144 
2145     case OP_ASSERT:
2146     case OP_ASSERT_NOT:
2147     case OP_ASSERTBACK:
2148     case OP_ASSERTBACK_NOT:
2149     case OP_ONCE:
2150     case OP_SCRIPT_RUN:
2151     case OP_BRAPOS:
2152     case OP_SBRA:
2153     case OP_SBRAPOS:
2154     case OP_SCOND:
2155     length++;
2156     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2157     cc += 1 + LINK_SIZE;
2158     break;
2159 
2160     case OP_CBRA:
2161     case OP_SCBRA:
2162     length += 2;
2163     if (common->capture_last_ptr != 0)
2164       capture_last_found = TRUE;
2165     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2166       length++;
2167     cc += 1 + LINK_SIZE + IMM2_SIZE;
2168     break;
2169 
2170     case OP_CBRAPOS:
2171     case OP_SCBRAPOS:
2172     length += 2 + 2;
2173     if (common->capture_last_ptr != 0)
2174       capture_last_found = TRUE;
2175     cc += 1 + LINK_SIZE + IMM2_SIZE;
2176     break;
2177 
2178     case OP_COND:
2179     /* Might be a hidden SCOND. */
2180     alternative = cc + GET(cc, 1);
2181     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2182       length++;
2183     cc += 1 + LINK_SIZE;
2184     break;
2185 
2186     CASE_ITERATOR_PRIVATE_DATA_1
2187     if (PRIVATE_DATA(cc) != 0)
2188       length++;
2189     cc += 2;
2190 #ifdef SUPPORT_UNICODE
2191     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2192 #endif
2193     break;
2194 
2195     CASE_ITERATOR_PRIVATE_DATA_2A
2196     if (PRIVATE_DATA(cc) != 0)
2197       length += 2;
2198     cc += 2;
2199 #ifdef SUPPORT_UNICODE
2200     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2201 #endif
2202     break;
2203 
2204     CASE_ITERATOR_PRIVATE_DATA_2B
2205     if (PRIVATE_DATA(cc) != 0)
2206       length += 2;
2207     cc += 2 + IMM2_SIZE;
2208 #ifdef SUPPORT_UNICODE
2209     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2210 #endif
2211     break;
2212 
2213     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2214     if (PRIVATE_DATA(cc) != 0)
2215       length++;
2216     cc += 1;
2217     break;
2218 
2219     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2220     if (PRIVATE_DATA(cc) != 0)
2221       length += 2;
2222     cc += 1;
2223     break;
2224 
2225     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2226     if (PRIVATE_DATA(cc) != 0)
2227       length += 2;
2228     cc += 1 + IMM2_SIZE;
2229     break;
2230 
2231     case OP_CLASS:
2232     case OP_NCLASS:
2233 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2234     case OP_XCLASS:
2235     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2236 #else
2237     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2238 #endif
2239     if (PRIVATE_DATA(cc) != 0)
2240       length += get_class_iterator_size(cc + size);
2241     cc += size;
2242     break;
2243 
2244     case OP_MARK:
2245     case OP_COMMIT_ARG:
2246     case OP_PRUNE_ARG:
2247     case OP_THEN_ARG:
2248     SLJIT_ASSERT(common->mark_ptr != 0);
2249     if (!setmark_found)
2250       setmark_found = TRUE;
2251     if (common->control_head_ptr != 0)
2252       control_head_found = TRUE;
2253     if (*cc != OP_MARK)
2254       quit_found = TRUE;
2255 
2256     cc += 1 + 2 + cc[1];
2257     break;
2258 
2259     case OP_PRUNE:
2260     case OP_SKIP:
2261     case OP_COMMIT:
2262     quit_found = TRUE;
2263     cc++;
2264     break;
2265 
2266     case OP_SKIP_ARG:
2267     quit_found = TRUE;
2268     cc += 1 + 2 + cc[1];
2269     break;
2270 
2271     case OP_THEN:
2272     SLJIT_ASSERT(common->control_head_ptr != 0);
2273     quit_found = TRUE;
2274     if (!control_head_found)
2275       control_head_found = TRUE;
2276     cc++;
2277     break;
2278 
2279     case OP_ACCEPT:
2280     case OP_ASSERT_ACCEPT:
2281     accept_found = TRUE;
2282     cc++;
2283     break;
2284 
2285     default:
2286     cc = next_opcode(common, cc);
2287     SLJIT_ASSERT(cc != NULL);
2288     break;
2289     }
2290   }
2291 SLJIT_ASSERT(cc == ccend);
2292 
2293 if (control_head_found)
2294   length++;
2295 if (capture_last_found)
2296   length++;
2297 if (quit_found)
2298   {
2299   if (setsom_found)
2300     length++;
2301   if (setmark_found)
2302     length++;
2303   }
2304 
2305 *needs_control_head = control_head_found;
2306 *has_quit = quit_found;
2307 *has_accept = accept_found;
2308 return length;
2309 }
2310 
2311 enum copy_recurse_data_types {
2312   recurse_copy_from_global,
2313   recurse_copy_private_to_global,
2314   recurse_copy_shared_to_global,
2315   recurse_copy_kept_shared_to_global,
2316   recurse_swap_global
2317 };
2318 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2319 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2320   int type, int stackptr, int stacktop, BOOL has_quit)
2321 {
2322 delayed_mem_copy_status status;
2323 PCRE2_SPTR alternative;
2324 sljit_sw private_srcw[2];
2325 sljit_sw shared_srcw[3];
2326 sljit_sw kept_shared_srcw[2];
2327 int private_count, shared_count, kept_shared_count;
2328 int from_sp, base_reg, offset, i;
2329 BOOL setsom_found = FALSE;
2330 BOOL setmark_found = FALSE;
2331 BOOL capture_last_found = FALSE;
2332 BOOL control_head_found = FALSE;
2333 
2334 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2335 SLJIT_ASSERT(common->control_head_ptr != 0);
2336 control_head_found = TRUE;
2337 #endif
2338 
2339 switch (type)
2340   {
2341   case recurse_copy_from_global:
2342   from_sp = TRUE;
2343   base_reg = STACK_TOP;
2344   break;
2345 
2346   case recurse_copy_private_to_global:
2347   case recurse_copy_shared_to_global:
2348   case recurse_copy_kept_shared_to_global:
2349   from_sp = FALSE;
2350   base_reg = STACK_TOP;
2351   break;
2352 
2353   default:
2354   SLJIT_ASSERT(type == recurse_swap_global);
2355   from_sp = FALSE;
2356   base_reg = TMP2;
2357   break;
2358   }
2359 
2360 stackptr = STACK(stackptr);
2361 stacktop = STACK(stacktop);
2362 
2363 status.tmp_regs[0] = TMP1;
2364 status.saved_tmp_regs[0] = TMP1;
2365 
2366 if (base_reg != TMP2)
2367   {
2368   status.tmp_regs[1] = TMP2;
2369   status.saved_tmp_regs[1] = TMP2;
2370   }
2371 else
2372   {
2373   status.saved_tmp_regs[1] = RETURN_ADDR;
2374   if (sljit_get_register_index(RETURN_ADDR) == -1)
2375     status.tmp_regs[1] = STR_PTR;
2376   else
2377     status.tmp_regs[1] = RETURN_ADDR;
2378   }
2379 
2380 status.saved_tmp_regs[2] = TMP3;
2381 if (sljit_get_register_index(TMP3) == -1)
2382   status.tmp_regs[2] = STR_END;
2383 else
2384   status.tmp_regs[2] = TMP3;
2385 
2386 delayed_mem_copy_init(&status, common);
2387 
2388 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2389   {
2390   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2391 
2392   if (!from_sp)
2393     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2394 
2395   if (from_sp || type == recurse_swap_global)
2396     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2397   }
2398 
2399 stackptr += sizeof(sljit_sw);
2400 
2401 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2402 if (type != recurse_copy_shared_to_global)
2403   {
2404   if (!from_sp)
2405     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2406 
2407   if (from_sp || type == recurse_swap_global)
2408     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2409   }
2410 
2411 stackptr += sizeof(sljit_sw);
2412 #endif
2413 
2414 while (cc < ccend)
2415   {
2416   private_count = 0;
2417   shared_count = 0;
2418   kept_shared_count = 0;
2419 
2420   switch(*cc)
2421     {
2422     case OP_SET_SOM:
2423     SLJIT_ASSERT(common->has_set_som);
2424     if (has_quit && !setsom_found)
2425       {
2426       kept_shared_srcw[0] = OVECTOR(0);
2427       kept_shared_count = 1;
2428       setsom_found = TRUE;
2429       }
2430     cc += 1;
2431     break;
2432 
2433     case OP_RECURSE:
2434     if (has_quit)
2435       {
2436       if (common->has_set_som && !setsom_found)
2437         {
2438         kept_shared_srcw[0] = OVECTOR(0);
2439         kept_shared_count = 1;
2440         setsom_found = TRUE;
2441         }
2442       if (common->mark_ptr != 0 && !setmark_found)
2443         {
2444         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2445         kept_shared_count++;
2446         setmark_found = TRUE;
2447         }
2448       }
2449     if (common->capture_last_ptr != 0 && !capture_last_found)
2450       {
2451       shared_srcw[0] = common->capture_last_ptr;
2452       shared_count = 1;
2453       capture_last_found = TRUE;
2454       }
2455     cc += 1 + LINK_SIZE;
2456     break;
2457 
2458     case OP_KET:
2459     if (PRIVATE_DATA(cc) != 0)
2460       {
2461       private_count = 1;
2462       private_srcw[0] = PRIVATE_DATA(cc);
2463       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2464       cc += PRIVATE_DATA(cc + 1);
2465       }
2466     cc += 1 + LINK_SIZE;
2467     break;
2468 
2469     case OP_ASSERT:
2470     case OP_ASSERT_NOT:
2471     case OP_ASSERTBACK:
2472     case OP_ASSERTBACK_NOT:
2473     case OP_ONCE:
2474     case OP_SCRIPT_RUN:
2475     case OP_BRAPOS:
2476     case OP_SBRA:
2477     case OP_SBRAPOS:
2478     case OP_SCOND:
2479     private_count = 1;
2480     private_srcw[0] = PRIVATE_DATA(cc);
2481     cc += 1 + LINK_SIZE;
2482     break;
2483 
2484     case OP_CBRA:
2485     case OP_SCBRA:
2486     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2487     shared_srcw[0] = OVECTOR(offset);
2488     shared_srcw[1] = OVECTOR(offset + 1);
2489     shared_count = 2;
2490 
2491     if (common->capture_last_ptr != 0 && !capture_last_found)
2492       {
2493       shared_srcw[2] = common->capture_last_ptr;
2494       shared_count = 3;
2495       capture_last_found = TRUE;
2496       }
2497 
2498     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2499       {
2500       private_count = 1;
2501       private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2502       }
2503     cc += 1 + LINK_SIZE + IMM2_SIZE;
2504     break;
2505 
2506     case OP_CBRAPOS:
2507     case OP_SCBRAPOS:
2508     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2509     shared_srcw[0] = OVECTOR(offset);
2510     shared_srcw[1] = OVECTOR(offset + 1);
2511     shared_count = 2;
2512 
2513     if (common->capture_last_ptr != 0 && !capture_last_found)
2514       {
2515       shared_srcw[2] = common->capture_last_ptr;
2516       shared_count = 3;
2517       capture_last_found = TRUE;
2518       }
2519 
2520     private_count = 2;
2521     private_srcw[0] = PRIVATE_DATA(cc);
2522     private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2523     cc += 1 + LINK_SIZE + IMM2_SIZE;
2524     break;
2525 
2526     case OP_COND:
2527     /* Might be a hidden SCOND. */
2528     alternative = cc + GET(cc, 1);
2529     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2530       {
2531       private_count = 1;
2532       private_srcw[0] = PRIVATE_DATA(cc);
2533       }
2534     cc += 1 + LINK_SIZE;
2535     break;
2536 
2537     CASE_ITERATOR_PRIVATE_DATA_1
2538     if (PRIVATE_DATA(cc))
2539       {
2540       private_count = 1;
2541       private_srcw[0] = PRIVATE_DATA(cc);
2542       }
2543     cc += 2;
2544 #ifdef SUPPORT_UNICODE
2545     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2546 #endif
2547     break;
2548 
2549     CASE_ITERATOR_PRIVATE_DATA_2A
2550     if (PRIVATE_DATA(cc))
2551       {
2552       private_count = 2;
2553       private_srcw[0] = PRIVATE_DATA(cc);
2554       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2555       }
2556     cc += 2;
2557 #ifdef SUPPORT_UNICODE
2558     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2559 #endif
2560     break;
2561 
2562     CASE_ITERATOR_PRIVATE_DATA_2B
2563     if (PRIVATE_DATA(cc))
2564       {
2565       private_count = 2;
2566       private_srcw[0] = PRIVATE_DATA(cc);
2567       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2568       }
2569     cc += 2 + IMM2_SIZE;
2570 #ifdef SUPPORT_UNICODE
2571     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2572 #endif
2573     break;
2574 
2575     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2576     if (PRIVATE_DATA(cc))
2577       {
2578       private_count = 1;
2579       private_srcw[0] = PRIVATE_DATA(cc);
2580       }
2581     cc += 1;
2582     break;
2583 
2584     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2585     if (PRIVATE_DATA(cc))
2586       {
2587       private_count = 2;
2588       private_srcw[0] = PRIVATE_DATA(cc);
2589       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2590       }
2591     cc += 1;
2592     break;
2593 
2594     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2595     if (PRIVATE_DATA(cc))
2596       {
2597       private_count = 2;
2598       private_srcw[0] = PRIVATE_DATA(cc);
2599       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2600       }
2601     cc += 1 + IMM2_SIZE;
2602     break;
2603 
2604     case OP_CLASS:
2605     case OP_NCLASS:
2606 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2607     case OP_XCLASS:
2608     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2609 #else
2610     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2611 #endif
2612     if (PRIVATE_DATA(cc) != 0)
2613       switch(get_class_iterator_size(cc + i))
2614         {
2615         case 1:
2616         private_count = 1;
2617         private_srcw[0] = PRIVATE_DATA(cc);
2618         break;
2619 
2620         case 2:
2621         private_count = 2;
2622         private_srcw[0] = PRIVATE_DATA(cc);
2623         private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2624         break;
2625 
2626         default:
2627         SLJIT_UNREACHABLE();
2628         break;
2629         }
2630     cc += i;
2631     break;
2632 
2633     case OP_MARK:
2634     case OP_COMMIT_ARG:
2635     case OP_PRUNE_ARG:
2636     case OP_THEN_ARG:
2637     SLJIT_ASSERT(common->mark_ptr != 0);
2638     if (has_quit && !setmark_found)
2639       {
2640       kept_shared_srcw[0] = common->mark_ptr;
2641       kept_shared_count = 1;
2642       setmark_found = TRUE;
2643       }
2644     if (common->control_head_ptr != 0 && !control_head_found)
2645       {
2646       shared_srcw[0] = common->control_head_ptr;
2647       shared_count = 1;
2648       control_head_found = TRUE;
2649       }
2650     cc += 1 + 2 + cc[1];
2651     break;
2652 
2653     case OP_THEN:
2654     SLJIT_ASSERT(common->control_head_ptr != 0);
2655     if (!control_head_found)
2656       {
2657       shared_srcw[0] = common->control_head_ptr;
2658       shared_count = 1;
2659       control_head_found = TRUE;
2660       }
2661     cc++;
2662     break;
2663 
2664     default:
2665     cc = next_opcode(common, cc);
2666     SLJIT_ASSERT(cc != NULL);
2667     break;
2668     }
2669 
2670   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2671     {
2672     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2673 
2674     for (i = 0; i < private_count; i++)
2675       {
2676       SLJIT_ASSERT(private_srcw[i] != 0);
2677 
2678       if (!from_sp)
2679         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2680 
2681       if (from_sp || type == recurse_swap_global)
2682         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2683 
2684       stackptr += sizeof(sljit_sw);
2685       }
2686     }
2687   else
2688     stackptr += sizeof(sljit_sw) * private_count;
2689 
2690   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2691     {
2692     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2693 
2694     for (i = 0; i < shared_count; i++)
2695       {
2696       SLJIT_ASSERT(shared_srcw[i] != 0);
2697 
2698       if (!from_sp)
2699         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2700 
2701       if (from_sp || type == recurse_swap_global)
2702         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2703 
2704       stackptr += sizeof(sljit_sw);
2705       }
2706     }
2707   else
2708     stackptr += sizeof(sljit_sw) * shared_count;
2709 
2710   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2711     {
2712     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2713 
2714     for (i = 0; i < kept_shared_count; i++)
2715       {
2716       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2717 
2718       if (!from_sp)
2719         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2720 
2721       if (from_sp || type == recurse_swap_global)
2722         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2723 
2724       stackptr += sizeof(sljit_sw);
2725       }
2726     }
2727   else
2728     stackptr += sizeof(sljit_sw) * kept_shared_count;
2729   }
2730 
2731 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2732 
2733 delayed_mem_copy_finish(&status);
2734 }
2735 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2736 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2737 {
2738 PCRE2_SPTR end = bracketend(cc);
2739 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2740 
2741 /* Assert captures then. */
2742 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2743   current_offset = NULL;
2744 /* Conditional block does not. */
2745 if (*cc == OP_COND || *cc == OP_SCOND)
2746   has_alternatives = FALSE;
2747 
2748 cc = next_opcode(common, cc);
2749 if (has_alternatives)
2750   current_offset = common->then_offsets + (cc - common->start);
2751 
2752 while (cc < end)
2753   {
2754   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2755     cc = set_then_offsets(common, cc, current_offset);
2756   else
2757     {
2758     if (*cc == OP_ALT && has_alternatives)
2759       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2760     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2761       *current_offset = 1;
2762     cc = next_opcode(common, cc);
2763     }
2764   }
2765 
2766 return end;
2767 }
2768 
2769 #undef CASE_ITERATOR_PRIVATE_DATA_1
2770 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2771 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2772 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2773 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2774 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2775 
is_powerof2(unsigned int value)2776 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2777 {
2778 return (value & (value - 1)) == 0;
2779 }
2780 
set_jumps(jump_list * list,struct sljit_label * label)2781 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2782 {
2783 while (list)
2784   {
2785   /* sljit_set_label is clever enough to do nothing
2786   if either the jump or the label is NULL. */
2787   SET_LABEL(list->jump, label);
2788   list = list->next;
2789   }
2790 }
2791 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2792 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2793 {
2794 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2795 if (list_item)
2796   {
2797   list_item->next = *list;
2798   list_item->jump = jump;
2799   *list = list_item;
2800   }
2801 }
2802 
add_stub(compiler_common * common,struct sljit_jump * start)2803 static void add_stub(compiler_common *common, struct sljit_jump *start)
2804 {
2805 DEFINE_COMPILER;
2806 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2807 
2808 if (list_item)
2809   {
2810   list_item->start = start;
2811   list_item->quit = LABEL();
2812   list_item->next = common->stubs;
2813   common->stubs = list_item;
2814   }
2815 }
2816 
flush_stubs(compiler_common * common)2817 static void flush_stubs(compiler_common *common)
2818 {
2819 DEFINE_COMPILER;
2820 stub_list *list_item = common->stubs;
2821 
2822 while (list_item)
2823   {
2824   JUMPHERE(list_item->start);
2825   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2826   JUMPTO(SLJIT_JUMP, list_item->quit);
2827   list_item = list_item->next;
2828   }
2829 common->stubs = NULL;
2830 }
2831 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2832 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2833 {
2834 DEFINE_COMPILER;
2835 label_addr_list *label_addr;
2836 
2837 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2838 if (label_addr == NULL)
2839   return;
2840 label_addr->label = LABEL();
2841 label_addr->update_addr = update_addr;
2842 label_addr->next = common->label_addrs;
2843 common->label_addrs = label_addr;
2844 }
2845 
count_match(compiler_common * common)2846 static SLJIT_INLINE void count_match(compiler_common *common)
2847 {
2848 DEFINE_COMPILER;
2849 
2850 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2851 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2852 }
2853 
allocate_stack(compiler_common * common,int size)2854 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2855 {
2856 /* May destroy all locals and registers except TMP2. */
2857 DEFINE_COMPILER;
2858 
2859 SLJIT_ASSERT(size > 0);
2860 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2861 #ifdef DESTROY_REGISTERS
2862 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2863 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2864 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2867 #endif
2868 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2869 }
2870 
free_stack(compiler_common * common,int size)2871 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2872 {
2873 DEFINE_COMPILER;
2874 
2875 SLJIT_ASSERT(size > 0);
2876 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2877 }
2878 
allocate_read_only_data(compiler_common * common,sljit_uw size)2879 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2880 {
2881 DEFINE_COMPILER;
2882 sljit_uw *result;
2883 
2884 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2885   return NULL;
2886 
2887 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2888 if (SLJIT_UNLIKELY(result == NULL))
2889   {
2890   sljit_set_compiler_memory_error(compiler);
2891   return NULL;
2892   }
2893 
2894 *(void**)result = common->read_only_data_head;
2895 common->read_only_data_head = (void *)result;
2896 return result + 1;
2897 }
2898 
reset_ovector(compiler_common * common,int length)2899 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2900 {
2901 DEFINE_COMPILER;
2902 struct sljit_label *loop;
2903 sljit_s32 i;
2904 
2905 /* At this point we can freely use all temporary registers. */
2906 SLJIT_ASSERT(length > 1);
2907 /* TMP1 returns with begin - 1. */
2908 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2909 if (length < 8)
2910   {
2911   for (i = 1; i < length; i++)
2912     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2913   }
2914 else
2915   {
2916   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2917     {
2918     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2919     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2920     loop = LABEL();
2921     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2922     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2923     JUMPTO(SLJIT_NOT_ZERO, loop);
2924     }
2925   else
2926     {
2927     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2928     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2929     loop = LABEL();
2930     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2931     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2932     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2933     JUMPTO(SLJIT_NOT_ZERO, loop);
2934     }
2935   }
2936 }
2937 
reset_fast_fail(compiler_common * common)2938 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2939 {
2940 DEFINE_COMPILER;
2941 sljit_s32 i;
2942 
2943 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2944 
2945 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2946 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2947   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2948 }
2949 
do_reset_match(compiler_common * common,int length)2950 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2951 {
2952 DEFINE_COMPILER;
2953 struct sljit_label *loop;
2954 int i;
2955 
2956 SLJIT_ASSERT(length > 1);
2957 /* OVECTOR(1) contains the "string begin - 1" constant. */
2958 if (length > 2)
2959   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2960 if (length < 8)
2961   {
2962   for (i = 2; i < length; i++)
2963     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2964   }
2965 else
2966   {
2967   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2968     {
2969     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2970     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2971     loop = LABEL();
2972     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2973     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2974     JUMPTO(SLJIT_NOT_ZERO, loop);
2975     }
2976   else
2977     {
2978     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2979     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2980     loop = LABEL();
2981     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2982     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2983     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2984     JUMPTO(SLJIT_NOT_ZERO, loop);
2985     }
2986   }
2987 
2988 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2989 if (common->mark_ptr != 0)
2990   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2991 if (common->control_head_ptr != 0)
2992   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2993 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2995 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2996 }
2997 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2998 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2999 {
3000 while (current != NULL)
3001   {
3002   switch (current[1])
3003     {
3004     case type_then_trap:
3005     break;
3006 
3007     case type_mark:
3008     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3009       return current[3];
3010     break;
3011 
3012     default:
3013     SLJIT_UNREACHABLE();
3014     break;
3015     }
3016   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3017   current = (sljit_sw*)current[0];
3018   }
3019 return 0;
3020 }
3021 
copy_ovector(compiler_common * common,int topbracket)3022 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3023 {
3024 DEFINE_COMPILER;
3025 struct sljit_label *loop;
3026 BOOL has_pre;
3027 
3028 /* At this point we can freely use all registers. */
3029 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3031 
3032 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3033 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3034 if (common->mark_ptr != 0)
3035   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3036 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3038 if (common->mark_ptr != 0)
3039   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3040 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3041   SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3042 
3043 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3044 
3045 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3046 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
3047 
3048 loop = LABEL();
3049 
3050 if (has_pre)
3051   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3052 else
3053   {
3054   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3055   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3056   }
3057 
3058 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3059 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3060 /* Copy the integer value to the output buffer */
3061 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3062 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3063 #endif
3064 
3065 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3066 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3067 
3068 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3069 JUMPTO(SLJIT_NOT_ZERO, loop);
3070 
3071 /* Calculate the return value, which is the maximum ovector value. */
3072 if (topbracket > 1)
3073   {
3074   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3075     {
3076     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3077     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3078 
3079     /* OVECTOR(0) is never equal to SLJIT_S2. */
3080     loop = LABEL();
3081     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3082     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3083     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3084     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3085     }
3086   else
3087     {
3088     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3089     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3090 
3091     /* OVECTOR(0) is never equal to SLJIT_S2. */
3092     loop = LABEL();
3093     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3094     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3095     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3096     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3097     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3098     }
3099   }
3100 else
3101   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3102 }
3103 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3104 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3105 {
3106 DEFINE_COMPILER;
3107 sljit_s32 mov_opcode;
3108 
3109 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3110 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3111   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3112 
3113 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
3114 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3115   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3116 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3117 
3118 /* Store match begin and end. */
3119 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
3120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3121 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
3122 
3123 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3124 
3125 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3126 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3127 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3128 #endif
3129 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3130 
3131 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3132 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3133 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3134 #endif
3135 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3136 
3137 JUMPTO(SLJIT_JUMP, quit);
3138 }
3139 
check_start_used_ptr(compiler_common * common)3140 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3141 {
3142 /* May destroy TMP1. */
3143 DEFINE_COMPILER;
3144 struct sljit_jump *jump;
3145 
3146 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3147   {
3148   /* The value of -1 must be kept for start_used_ptr! */
3149   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3150   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3151   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3152   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3153   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3154   JUMPHERE(jump);
3155   }
3156 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3157   {
3158   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3159   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3160   JUMPHERE(jump);
3161   }
3162 }
3163 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3164 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3165 {
3166 /* Detects if the character has an othercase. */
3167 unsigned int c;
3168 
3169 #ifdef SUPPORT_UNICODE
3170 if (common->utf)
3171   {
3172   GETCHAR(c, cc);
3173   if (c > 127)
3174     {
3175     return c != UCD_OTHERCASE(c);
3176     }
3177 #if PCRE2_CODE_UNIT_WIDTH != 8
3178   return common->fcc[c] != c;
3179 #endif
3180   }
3181 else
3182 #endif
3183   c = *cc;
3184 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3185 }
3186 
char_othercase(compiler_common * common,unsigned int c)3187 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3188 {
3189 /* Returns with the othercase. */
3190 #ifdef SUPPORT_UNICODE
3191 if (common->utf && c > 127)
3192   {
3193   return UCD_OTHERCASE(c);
3194   }
3195 #endif
3196 return TABLE_GET(c, common->fcc, c);
3197 }
3198 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3199 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3200 {
3201 /* Detects if the character and its othercase has only 1 bit difference. */
3202 unsigned int c, oc, bit;
3203 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3204 int n;
3205 #endif
3206 
3207 #ifdef SUPPORT_UNICODE
3208 if (common->utf)
3209   {
3210   GETCHAR(c, cc);
3211   if (c <= 127)
3212     oc = common->fcc[c];
3213   else
3214     {
3215     oc = UCD_OTHERCASE(c);
3216     }
3217   }
3218 else
3219   {
3220   c = *cc;
3221   oc = TABLE_GET(c, common->fcc, c);
3222   }
3223 #else
3224 c = *cc;
3225 oc = TABLE_GET(c, common->fcc, c);
3226 #endif
3227 
3228 SLJIT_ASSERT(c != oc);
3229 
3230 bit = c ^ oc;
3231 /* Optimized for English alphabet. */
3232 if (c <= 127 && bit == 0x20)
3233   return (0 << 8) | 0x20;
3234 
3235 /* Since c != oc, they must have at least 1 bit difference. */
3236 if (!is_powerof2(bit))
3237   return 0;
3238 
3239 #if PCRE2_CODE_UNIT_WIDTH == 8
3240 
3241 #ifdef SUPPORT_UNICODE
3242 if (common->utf && c > 127)
3243   {
3244   n = GET_EXTRALEN(*cc);
3245   while ((bit & 0x3f) == 0)
3246     {
3247     n--;
3248     bit >>= 6;
3249     }
3250   return (n << 8) | bit;
3251   }
3252 #endif /* SUPPORT_UNICODE */
3253 return (0 << 8) | bit;
3254 
3255 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3256 
3257 #ifdef SUPPORT_UNICODE
3258 if (common->utf && c > 65535)
3259   {
3260   if (bit >= (1u << 10))
3261     bit >>= 10;
3262   else
3263     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3264   }
3265 #endif /* SUPPORT_UNICODE */
3266 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3267 
3268 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3269 }
3270 
check_partial(compiler_common * common,BOOL force)3271 static void check_partial(compiler_common *common, BOOL force)
3272 {
3273 /* Checks whether a partial matching is occurred. Does not modify registers. */
3274 DEFINE_COMPILER;
3275 struct sljit_jump *jump = NULL;
3276 
3277 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3278 
3279 if (common->mode == PCRE2_JIT_COMPLETE)
3280   return;
3281 
3282 if (!force)
3283   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3284 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3285   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3286 
3287 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3288   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3289 else
3290   {
3291   if (common->partialmatchlabel != NULL)
3292     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3293   else
3294     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3295   }
3296 
3297 if (jump != NULL)
3298   JUMPHERE(jump);
3299 }
3300 
check_str_end(compiler_common * common,jump_list ** end_reached)3301 static void check_str_end(compiler_common *common, jump_list **end_reached)
3302 {
3303 /* Does not affect registers. Usually used in a tight spot. */
3304 DEFINE_COMPILER;
3305 struct sljit_jump *jump;
3306 
3307 if (common->mode == PCRE2_JIT_COMPLETE)
3308   {
3309   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3310   return;
3311   }
3312 
3313 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3314 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3315   {
3316   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3317   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3318   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3319   }
3320 else
3321   {
3322   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3323   if (common->partialmatchlabel != NULL)
3324     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3325   else
3326     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3327   }
3328 JUMPHERE(jump);
3329 }
3330 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3331 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3332 {
3333 DEFINE_COMPILER;
3334 struct sljit_jump *jump;
3335 
3336 if (common->mode == PCRE2_JIT_COMPLETE)
3337   {
3338   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3339   return;
3340   }
3341 
3342 /* Partial matching mode. */
3343 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3344 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3345 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3346   {
3347   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3348   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3349   }
3350 else
3351   {
3352   if (common->partialmatchlabel != NULL)
3353     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3354   else
3355     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3356   }
3357 JUMPHERE(jump);
3358 }
3359 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3360 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3361 {
3362 /* Reads the character into TMP1, keeps STR_PTR.
3363 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3364 DEFINE_COMPILER;
3365 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3366 struct sljit_jump *jump;
3367 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3368 
3369 SLJIT_UNUSED_ARG(max);
3370 SLJIT_UNUSED_ARG(dst);
3371 SLJIT_UNUSED_ARG(dstw);
3372 SLJIT_UNUSED_ARG(backtracks);
3373 
3374 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3375 
3376 #ifdef SUPPORT_UNICODE
3377 #if PCRE2_CODE_UNIT_WIDTH == 8
3378 if (common->utf)
3379   {
3380   if (max < 128) return;
3381 
3382   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3383   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3384   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3385   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3386   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3387   if (backtracks && common->invalid_utf)
3388     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3389   JUMPHERE(jump);
3390   }
3391 #elif PCRE2_CODE_UNIT_WIDTH == 16
3392 if (common->utf)
3393   {
3394   if (max < 0xd800) return;
3395 
3396   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3397 
3398   if (common->invalid_utf)
3399     {
3400     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3401     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3402     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3403     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3404     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3405     if (backtracks && common->invalid_utf)
3406       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3407     }
3408   else
3409     {
3410     /* TMP2 contains the high surrogate. */
3411     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3412     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3413     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3414     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3415     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3416     }
3417 
3418   JUMPHERE(jump);
3419   }
3420 #elif PCRE2_CODE_UNIT_WIDTH == 32
3421 if (common->invalid_utf)
3422   {
3423   if (backtracks != NULL)
3424     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3425   else
3426     {
3427     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3428     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3429     }
3430   }
3431 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3432 #endif /* SUPPORT_UNICODE */
3433 }
3434 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3435 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3436 {
3437 /* Reads one character back without moving STR_PTR. TMP2 must
3438 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3439 DEFINE_COMPILER;
3440 
3441 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3442 struct sljit_jump *jump;
3443 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3444 
3445 SLJIT_UNUSED_ARG(max);
3446 SLJIT_UNUSED_ARG(backtracks);
3447 
3448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3449 
3450 #ifdef SUPPORT_UNICODE
3451 #if PCRE2_CODE_UNIT_WIDTH == 8
3452 if (common->utf)
3453   {
3454   if (max < 128) return;
3455 
3456   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3457   if (common->invalid_utf)
3458     {
3459     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3460     if (backtracks != NULL)
3461       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3462     }
3463   else
3464     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3465   JUMPHERE(jump);
3466   }
3467 #elif PCRE2_CODE_UNIT_WIDTH == 16
3468 if (common->utf)
3469   {
3470   if (max < 0xd800) return;
3471 
3472   if (common->invalid_utf)
3473     {
3474     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3475     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3476     if (backtracks != NULL)
3477       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3478     }
3479   else
3480     {
3481     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3482     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3483     /* TMP2 contains the low surrogate. */
3484     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3485     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3486     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3487     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3488     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3489     }
3490     JUMPHERE(jump);
3491   }
3492 #elif PCRE2_CODE_UNIT_WIDTH == 32
3493   if (common->invalid_utf)
3494     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3495 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3496 #endif /* SUPPORT_UNICODE */
3497 }
3498 
3499 #define READ_CHAR_UPDATE_STR_PTR 0x1
3500 #define READ_CHAR_UTF8_NEWLINE 0x2
3501 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3502 #define READ_CHAR_VALID_UTF 0x4
3503 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3504 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3505   jump_list **backtracks, sljit_u32 options)
3506 {
3507 /* Reads the precise value of a character into TMP1, if the character is
3508 between min and max (c >= min && c <= max). Otherwise it returns with a value
3509 outside the range. Does not check STR_END. */
3510 DEFINE_COMPILER;
3511 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3512 struct sljit_jump *jump;
3513 #endif
3514 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3515 struct sljit_jump *jump2;
3516 #endif
3517 
3518 SLJIT_UNUSED_ARG(min);
3519 SLJIT_UNUSED_ARG(max);
3520 SLJIT_UNUSED_ARG(backtracks);
3521 SLJIT_UNUSED_ARG(options);
3522 SLJIT_ASSERT(min <= max);
3523 
3524 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3526 
3527 #ifdef SUPPORT_UNICODE
3528 #if PCRE2_CODE_UNIT_WIDTH == 8
3529 if (common->utf)
3530   {
3531   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3532 
3533   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3534     {
3535     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3536 
3537     if (options & READ_CHAR_UTF8_NEWLINE)
3538       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3539     else
3540       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3541 
3542     if (backtracks != NULL)
3543       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3544     JUMPHERE(jump);
3545     return;
3546     }
3547 
3548   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3549   if (min >= 0x10000)
3550     {
3551     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3552     if (options & READ_CHAR_UPDATE_STR_PTR)
3553       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3554     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3555     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3556     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3557     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3558     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3559     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3560     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3561     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3562     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3563     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3564     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3565       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3566     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3567     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3568     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3569     JUMPHERE(jump2);
3570     if (options & READ_CHAR_UPDATE_STR_PTR)
3571       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3572     }
3573   else if (min >= 0x800 && max <= 0xffff)
3574     {
3575     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3576     if (options & READ_CHAR_UPDATE_STR_PTR)
3577       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3578     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3579     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3580     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3581     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3582     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3583     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3584     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3585       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3586     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3587     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3588     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3589     JUMPHERE(jump2);
3590     if (options & READ_CHAR_UPDATE_STR_PTR)
3591       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3592     }
3593   else if (max >= 0x800)
3594     {
3595     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3596     }
3597   else if (max < 128)
3598     {
3599     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3600     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3601     }
3602   else
3603     {
3604     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3605     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3606       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3607     else
3608       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3609     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3610     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3611     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3612     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3613     if (options & READ_CHAR_UPDATE_STR_PTR)
3614       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3615     }
3616   JUMPHERE(jump);
3617   }
3618 #elif PCRE2_CODE_UNIT_WIDTH == 16
3619 if (common->utf)
3620   {
3621   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3622 
3623   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3624     {
3625     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3626     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3627 
3628     if (options & READ_CHAR_UTF8_NEWLINE)
3629       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3630     else
3631       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3632 
3633     if (backtracks != NULL)
3634       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3635     JUMPHERE(jump);
3636     return;
3637     }
3638 
3639   if (max >= 0x10000)
3640     {
3641     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3642     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3643     /* TMP2 contains the high surrogate. */
3644     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3645     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3646     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3647     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3648     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3649     JUMPHERE(jump);
3650     return;
3651     }
3652 
3653   /* Skip low surrogate if necessary. */
3654   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3655 
3656   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
3657     {
3658     if (options & READ_CHAR_UPDATE_STR_PTR)
3659       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3660     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3661     if (options & READ_CHAR_UPDATE_STR_PTR)
3662       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3663     if (max >= 0xd800)
3664       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3665     }
3666   else
3667     {
3668     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3669     if (options & READ_CHAR_UPDATE_STR_PTR)
3670       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3671     if (max >= 0xd800)
3672       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3673     JUMPHERE(jump);
3674     }
3675   }
3676 #elif PCRE2_CODE_UNIT_WIDTH == 32
3677 if (common->invalid_utf)
3678   {
3679   if (backtracks != NULL)
3680     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3681   else
3682     {
3683     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3684     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3685     }
3686   }
3687 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3688 #endif /* SUPPORT_UNICODE */
3689 }
3690 
3691 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3692 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)3693 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
3694 {
3695 /* Tells whether the character codes below 128 are enough
3696 to determine a match. */
3697 const sljit_u8 value = nclass ? 0xff : 0;
3698 const sljit_u8 *end = bitset + 32;
3699 
3700 bitset += 16;
3701 do
3702   {
3703   if (*bitset++ != value)
3704     return FALSE;
3705   }
3706 while (bitset < end);
3707 return TRUE;
3708 }
3709 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)3710 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
3711 {
3712 /* Reads the precise character type of a character into TMP1, if the character
3713 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
3714 full_read argument tells whether characters above max are accepted or not. */
3715 DEFINE_COMPILER;
3716 struct sljit_jump *jump;
3717 
3718 SLJIT_ASSERT(common->utf);
3719 
3720 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3722 
3723 /* All values > 127 are zero in ctypes. */
3724 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3725 
3726 if (negated)
3727   {
3728   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
3729 
3730   if (common->invalid_utf)
3731     {
3732     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3733     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3734     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3735     }
3736   else
3737     {
3738     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3739     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3740     }
3741   JUMPHERE(jump);
3742   }
3743 }
3744 
3745 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3746 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)3747 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
3748 {
3749 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3750 DEFINE_COMPILER;
3751 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3752 struct sljit_jump *jump;
3753 #endif
3754 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3755 struct sljit_jump *jump2;
3756 #endif
3757 
3758 SLJIT_UNUSED_ARG(backtracks);
3759 SLJIT_UNUSED_ARG(negated);
3760 
3761 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3763 
3764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3765 if (common->utf)
3766   {
3767   /* The result of this read may be unused, but saves an "else" part. */
3768   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3769   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
3770 
3771   if (!negated)
3772     {
3773     if (common->invalid_utf)
3774       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3775 
3776     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3777     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3778     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
3779     if (common->invalid_utf)
3780       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
3781 
3782     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3783     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3784     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
3785     if (common->invalid_utf)
3786       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
3787 
3788     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3789     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3790     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3791     JUMPHERE(jump2);
3792     }
3793   else if (common->invalid_utf)
3794     {
3795     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3796     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
3797     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3798 
3799     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3800     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3801     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3802     JUMPHERE(jump2);
3803     }
3804   else
3805     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3806 
3807   JUMPHERE(jump);
3808   return;
3809   }
3810 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3811 
3812 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
3813 if (common->invalid_utf && negated)
3814   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
3815 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
3816 
3817 #if PCRE2_CODE_UNIT_WIDTH != 8
3818 /* The ctypes array contains only 256 values. */
3819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3820 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3821 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3822 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3823 #if PCRE2_CODE_UNIT_WIDTH != 8
3824 JUMPHERE(jump);
3825 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3826 
3827 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3828 if (common->utf && negated)
3829   {
3830   /* Skip low surrogate if necessary. */
3831   if (!common->invalid_utf)
3832     {
3833     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3834 
3835     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
3836       {
3837       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3838       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3839       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3840       }
3841     else
3842       {
3843       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3844       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3845       JUMPHERE(jump);
3846       }
3847     return;
3848     }
3849 
3850   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3851   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3852   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
3853   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3854 
3855   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3856   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3857   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
3858   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
3859 
3860   JUMPHERE(jump);
3861   return;
3862   }
3863 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3864 }
3865 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)3866 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
3867 {
3868 /* Goes one character back. TMP2 must contain the start of
3869 the subject buffer. Affects STR_PTR and TMP1. Does not modify
3870 STR_PTR for invalid character sequences. */
3871 DEFINE_COMPILER;
3872 
3873 SLJIT_UNUSED_ARG(backtracks);
3874 SLJIT_UNUSED_ARG(must_be_valid);
3875 
3876 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3877 struct sljit_jump *jump;
3878 #endif
3879 
3880 #ifdef SUPPORT_UNICODE
3881 #if PCRE2_CODE_UNIT_WIDTH == 8
3882 struct sljit_label *label;
3883 
3884 if (common->utf)
3885   {
3886   if (!must_be_valid && common->invalid_utf)
3887     {
3888     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3889     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3890     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3891     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
3892     if (backtracks != NULL)
3893       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3894     JUMPHERE(jump);
3895     return;
3896     }
3897 
3898   label = LABEL();
3899   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3900   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3901   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3902   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3903   return;
3904   }
3905 #elif PCRE2_CODE_UNIT_WIDTH == 16
3906 if (common->utf)
3907   {
3908   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3909   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910 
3911   if (!must_be_valid && common->invalid_utf)
3912     {
3913     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3914     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
3915     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
3916     if (backtracks != NULL)
3917       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3918     JUMPHERE(jump);
3919     return;
3920     }
3921 
3922   /* Skip low surrogate if necessary. */
3923   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3924   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3925   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3926   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3927   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3928   return;
3929   }
3930 #elif PCRE2_CODE_UNIT_WIDTH == 32
3931 if (common->invalid_utf && !must_be_valid)
3932   {
3933   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3934   if (backtracks != NULL)
3935     {
3936     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3937     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3938     return;
3939     }
3940 
3941   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3942   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
3943   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3944   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3945   return;
3946   }
3947 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3948 #endif /* SUPPORT_UNICODE */
3949 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3950 }
3951 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3952 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3953 {
3954 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3955 DEFINE_COMPILER;
3956 struct sljit_jump *jump;
3957 
3958 if (nltype == NLTYPE_ANY)
3959   {
3960   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3961   sljit_set_current_flags(compiler, SLJIT_SET_Z);
3962   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3963   }
3964 else if (nltype == NLTYPE_ANYCRLF)
3965   {
3966   if (jumpifmatch)
3967     {
3968     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3969     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3970     }
3971   else
3972     {
3973     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3974     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3975     JUMPHERE(jump);
3976     }
3977   }
3978 else
3979   {
3980   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3981   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3982   }
3983 }
3984 
3985 #ifdef SUPPORT_UNICODE
3986 
3987 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3988 static void do_utfreadchar(compiler_common *common)
3989 {
3990 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3991 of the character (>= 0xc0). Return char value in TMP1. */
3992 DEFINE_COMPILER;
3993 struct sljit_jump *jump;
3994 
3995 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3996 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3997 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3998 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3999 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4000 
4001 /* Searching for the first zero. */
4002 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4003 jump = JUMP(SLJIT_NOT_ZERO);
4004 /* Two byte sequence. */
4005 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4007 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4008 
4009 JUMPHERE(jump);
4010 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4011 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4012 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4014 
4015 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4016 jump = JUMP(SLJIT_NOT_ZERO);
4017 /* Three byte sequence. */
4018 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4020 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4021 
4022 /* Four byte sequence. */
4023 JUMPHERE(jump);
4024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4025 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4027 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4028 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4029 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4031 }
4032 
do_utfreadtype8(compiler_common * common)4033 static void do_utfreadtype8(compiler_common *common)
4034 {
4035 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4036 of the character (>= 0xc0). Return value in TMP1. */
4037 DEFINE_COMPILER;
4038 struct sljit_jump *jump;
4039 struct sljit_jump *compare;
4040 
4041 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4042 
4043 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4044 jump = JUMP(SLJIT_NOT_ZERO);
4045 /* Two byte sequence. */
4046 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4048 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4049 /* The upper 5 bits are known at this point. */
4050 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4051 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4052 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4053 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4054 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4055 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4056 
4057 JUMPHERE(compare);
4058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4059 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4060 
4061 /* We only have types for characters less than 256. */
4062 JUMPHERE(jump);
4063 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4065 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4066 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4067 }
4068 
do_utfreadchar_invalid(compiler_common * common)4069 static void do_utfreadchar_invalid(compiler_common *common)
4070 {
4071 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4072 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4073 undefined for invalid characters. */
4074 DEFINE_COMPILER;
4075 sljit_s32 i;
4076 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4077 struct sljit_jump *jump;
4078 struct sljit_jump *buffer_end_close;
4079 struct sljit_label *three_byte_entry;
4080 struct sljit_label *exit_invalid_label;
4081 struct sljit_jump *exit_invalid[11];
4082 
4083 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4084 
4085 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4086 
4087 /* Usually more than 3 characters remained in the subject buffer. */
4088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4089 
4090 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4091 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4092 
4093 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4094 
4095 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4096 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4097 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4098 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4099 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4100 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4101 
4102 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4103 jump = JUMP(SLJIT_NOT_ZERO);
4104 
4105 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4106 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4107 
4108 JUMPHERE(jump);
4109 
4110 /* Three-byte sequence. */
4111 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4112 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4113 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4114 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4115 if (has_cmov)
4116   {
4117   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4118   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4119   exit_invalid[2] = NULL;
4120   }
4121 else
4122   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4123 
4124 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4125 jump = JUMP(SLJIT_NOT_ZERO);
4126 
4127 three_byte_entry = LABEL();
4128 
4129 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4130 if (has_cmov)
4131   {
4132   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4133   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4134   exit_invalid[3] = NULL;
4135   }
4136 else
4137   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4138 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4139 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4140 
4141 if (has_cmov)
4142   {
4143   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4144   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4145   exit_invalid[4] = NULL;
4146   }
4147 else
4148   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4149 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4150 
4151 JUMPHERE(jump);
4152 
4153 /* Four-byte sequence. */
4154 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4155 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4156 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4157 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4158 if (has_cmov)
4159   {
4160   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4161   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4162   exit_invalid[5] = NULL;
4163   }
4164 else
4165   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4166 
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4168 if (has_cmov)
4169   {
4170   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4171   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4172   exit_invalid[6] = NULL;
4173   }
4174 else
4175   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4176 
4177 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4178 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4179 
4180 JUMPHERE(buffer_end_close);
4181 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4182 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4183 
4184 /* Two-byte sequence. */
4185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4187 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4188 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4189 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4190 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4191 
4192 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4193 jump = JUMP(SLJIT_NOT_ZERO);
4194 
4195 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4196 
4197 /* Three-byte sequence. */
4198 JUMPHERE(jump);
4199 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4200 
4201 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4202 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4203 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4204 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4205 if (has_cmov)
4206   {
4207   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4208   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4209   exit_invalid[10] = NULL;
4210   }
4211 else
4212   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4213 
4214 /* One will be substracted from STR_PTR later. */
4215 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4216 
4217 /* Four byte sequences are not possible. */
4218 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4219 
4220 exit_invalid_label = LABEL();
4221 for (i = 0; i < 11; i++)
4222   sljit_set_label(exit_invalid[i], exit_invalid_label);
4223 
4224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4225 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4226 }
4227 
do_utfreadnewline_invalid(compiler_common * common)4228 static void do_utfreadnewline_invalid(compiler_common *common)
4229 {
4230 /* Slow decoding a UTF-8 character, specialized for newlines.
4231 TMP1 contains the first byte of the character (>= 0xc0). Return
4232 char value in TMP1. */
4233 DEFINE_COMPILER;
4234 struct sljit_label *loop;
4235 struct sljit_label *skip_start;
4236 struct sljit_label *three_byte_exit;
4237 struct sljit_jump *jump[5];
4238 
4239 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4240 
4241 if (common->nltype != NLTYPE_ANY)
4242   {
4243   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4244 
4245   /* All newlines are ascii, just skip intermediate octets. */
4246   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4247   loop = LABEL();
4248   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4249   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4250   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4251   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4252 
4253   JUMPHERE(jump[0]);
4254 
4255   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4256   sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4257   return;
4258   }
4259 
4260 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4261 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263 
4264 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4265 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4266 
4267 skip_start = LABEL();
4268 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4269 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4270 
4271 /* Skip intermediate octets. */
4272 loop = LABEL();
4273 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4274 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4275 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4276 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4277 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4278 
4279 JUMPHERE(jump[3]);
4280 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4281 
4282 three_byte_exit = LABEL();
4283 JUMPHERE(jump[0]);
4284 JUMPHERE(jump[4]);
4285 
4286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4287 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4288 
4289 /* Two byte long newline: 0x85. */
4290 JUMPHERE(jump[1]);
4291 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4292 
4293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4295 
4296 /* Three byte long newlines: 0x2028 and 0x2029. */
4297 JUMPHERE(jump[2]);
4298 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4299 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4300 
4301 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4302 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4303 
4304 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4305 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4306 
4307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4308 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4309 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4310 }
4311 
do_utfmoveback_invalid(compiler_common * common)4312 static void do_utfmoveback_invalid(compiler_common *common)
4313 {
4314 /* Goes one character back. */
4315 DEFINE_COMPILER;
4316 sljit_s32 i;
4317 struct sljit_jump *jump;
4318 struct sljit_jump *buffer_start_close;
4319 struct sljit_label *exit_ok_label;
4320 struct sljit_label *exit_invalid_label;
4321 struct sljit_jump *exit_invalid[7];
4322 
4323 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4324 
4325 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4326 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4327 
4328 /* Two-byte sequence. */
4329 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4330 
4331 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4332 
4333 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4334 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4335 
4336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4338 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4339 
4340 /* Three-byte sequence. */
4341 JUMPHERE(jump);
4342 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4343 
4344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4345 
4346 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4347 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4348 
4349 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4350 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4352 
4353 /* Four-byte sequence. */
4354 JUMPHERE(jump);
4355 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4356 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4357 
4358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4359 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4360 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4361 
4362 exit_ok_label = LABEL();
4363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4364 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4365 
4366 /* Two-byte sequence. */
4367 JUMPHERE(buffer_start_close);
4368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4369 
4370 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4371 
4372 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4373 
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4375 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4376 
4377 /* Three-byte sequence. */
4378 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4379 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4380 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4381 
4382 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4383 
4384 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4385 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4386 
4387 /* Four-byte sequences are not possible. */
4388 
4389 exit_invalid_label = LABEL();
4390 sljit_set_label(exit_invalid[5], exit_invalid_label);
4391 sljit_set_label(exit_invalid[6], exit_invalid_label);
4392 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4394 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4395 
4396 JUMPHERE(exit_invalid[4]);
4397 /* -2 + 4 = 2 */
4398 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4399 
4400 exit_invalid_label = LABEL();
4401 for (i = 0; i < 4; i++)
4402   sljit_set_label(exit_invalid[i], exit_invalid_label);
4403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4404 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4405 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4406 }
4407 
do_utfpeakcharback(compiler_common * common)4408 static void do_utfpeakcharback(compiler_common *common)
4409 {
4410 /* Peak a character back. */
4411 DEFINE_COMPILER;
4412 struct sljit_jump *jump[2];
4413 
4414 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4415 
4416 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4417 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4418 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4419 
4420 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4422 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4423 
4424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4425 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4426 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4427 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4428 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4429 
4430 JUMPHERE(jump[1]);
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4432 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4433 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4434 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4435 
4436 JUMPHERE(jump[0]);
4437 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4438 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4439 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4440 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4441 
4442 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4443 }
4444 
do_utfpeakcharback_invalid(compiler_common * common)4445 static void do_utfpeakcharback_invalid(compiler_common *common)
4446 {
4447 /* Peak a character back. */
4448 DEFINE_COMPILER;
4449 sljit_s32 i;
4450 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4451 struct sljit_jump *jump[2];
4452 struct sljit_label *two_byte_entry;
4453 struct sljit_label *three_byte_entry;
4454 struct sljit_label *exit_invalid_label;
4455 struct sljit_jump *exit_invalid[8];
4456 
4457 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4458 
4459 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4460 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4461 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4462 
4463 /* Two-byte sequence. */
4464 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4465 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4466 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4467 
4468 two_byte_entry = LABEL();
4469 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4470 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4473 
4474 JUMPHERE(jump[1]);
4475 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4476 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4477 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4478 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4479 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4480 
4481 /* Three-byte sequence. */
4482 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4483 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4484 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4485 
4486 three_byte_entry = LABEL();
4487 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4488 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4489 
4490 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4491 if (has_cmov)
4492   {
4493   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4494   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4495   exit_invalid[2] = NULL;
4496   }
4497 else
4498   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4499 
4500 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4501 if (has_cmov)
4502   {
4503   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4504   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4505   exit_invalid[3] = NULL;
4506   }
4507 else
4508   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4509 
4510 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4511 
4512 JUMPHERE(jump[1]);
4513 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4514 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4515 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4516 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4517 
4518 /* Four-byte sequence. */
4519 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4520 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4521 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4522 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4523 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4524 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4525 
4526 if (has_cmov)
4527   {
4528   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4529   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4530   exit_invalid[5] = NULL;
4531   }
4532 else
4533   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4534 
4535 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4536 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4537 
4538 JUMPHERE(jump[0]);
4539 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4540 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4541 
4542 /* Two-byte sequence. */
4543 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4544 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4545 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4546 
4547 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4548 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4549 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4551 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4552 
4553 /* Three-byte sequence. */
4554 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4555 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4556 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4557 
4558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4559 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4560 
4561 JUMPHERE(jump[0]);
4562 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4563 
4564 /* Two-byte sequence. */
4565 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4566 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4567 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4568 
4569 exit_invalid_label = LABEL();
4570 for (i = 0; i < 8; i++)
4571   sljit_set_label(exit_invalid[i], exit_invalid_label);
4572 
4573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4574 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4575 }
4576 
4577 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4578 
4579 #if PCRE2_CODE_UNIT_WIDTH == 16
4580 
do_utfreadchar_invalid(compiler_common * common)4581 static void do_utfreadchar_invalid(compiler_common *common)
4582 {
4583 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4584 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4585 undefined for invalid characters. */
4586 DEFINE_COMPILER;
4587 struct sljit_jump *exit_invalid[3];
4588 
4589 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4590 
4591 /* TMP2 contains the high surrogate. */
4592 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4593 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4594 
4595 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4596 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4598 
4599 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4600 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4601 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4602 
4603 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4604 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4605 
4606 JUMPHERE(exit_invalid[0]);
4607 JUMPHERE(exit_invalid[1]);
4608 JUMPHERE(exit_invalid[2]);
4609 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4610 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4611 }
4612 
do_utfreadnewline_invalid(compiler_common * common)4613 static void do_utfreadnewline_invalid(compiler_common *common)
4614 {
4615 /* Slow decoding a UTF-16 character, specialized for newlines.
4616 TMP1 contains the first half of the character (>= 0xd800). Return
4617 char value in TMP1. */
4618 
4619 DEFINE_COMPILER;
4620 struct sljit_jump *exit_invalid[2];
4621 
4622 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4623 
4624 /* TMP2 contains the high surrogate. */
4625 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4626 
4627 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4628 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4629 
4630 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4631 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4632 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4634 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4636 
4637 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4638 
4639 JUMPHERE(exit_invalid[0]);
4640 JUMPHERE(exit_invalid[1]);
4641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4642 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4643 }
4644 
do_utfmoveback_invalid(compiler_common * common)4645 static void do_utfmoveback_invalid(compiler_common *common)
4646 {
4647 /* Goes one character back. */
4648 DEFINE_COMPILER;
4649 struct sljit_jump *exit_invalid[3];
4650 
4651 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4652 
4653 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4654 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4655 
4656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4657 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4658 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4659 
4660 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4662 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4663 
4664 JUMPHERE(exit_invalid[0]);
4665 JUMPHERE(exit_invalid[1]);
4666 JUMPHERE(exit_invalid[2]);
4667 
4668 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4670 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4671 }
4672 
do_utfpeakcharback_invalid(compiler_common * common)4673 static void do_utfpeakcharback_invalid(compiler_common *common)
4674 {
4675 /* Peak a character back. */
4676 DEFINE_COMPILER;
4677 struct sljit_jump *jump;
4678 struct sljit_jump *exit_invalid[3];
4679 
4680 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4681 
4682 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
4683 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4684 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
4685 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4686 
4687 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4688 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4689 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4690 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4691 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4692 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4693 
4694 JUMPHERE(jump);
4695 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4696 
4697 JUMPHERE(exit_invalid[0]);
4698 JUMPHERE(exit_invalid[1]);
4699 JUMPHERE(exit_invalid[2]);
4700 
4701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4702 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4703 }
4704 
4705 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
4706 
4707 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
4708 #define UCD_BLOCK_MASK 127
4709 #define UCD_BLOCK_SHIFT 7
4710 
do_getucd(compiler_common * common)4711 static void do_getucd(compiler_common *common)
4712 {
4713 /* Search the UCD record for the character comes in TMP1.
4714 Returns chartype in TMP1 and UCD offset in TMP2. */
4715 DEFINE_COMPILER;
4716 #if PCRE2_CODE_UNIT_WIDTH == 32
4717 struct sljit_jump *jump;
4718 #endif
4719 
4720 #if defined SLJIT_DEBUG && SLJIT_DEBUG
4721 /* dummy_ucd_record */
4722 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
4723 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
4724 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
4725 #endif
4726 
4727 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
4728 
4729 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4730 
4731 #if PCRE2_CODE_UNIT_WIDTH == 32
4732 if (!common->utf)
4733   {
4734   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
4735   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
4736   JUMPHERE(jump);
4737   }
4738 #endif
4739 
4740 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4741 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4742 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
4743 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
4744 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4745 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
4747 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
4748 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4749 }
4750 
do_getucdtype(compiler_common * common)4751 static void do_getucdtype(compiler_common *common)
4752 {
4753 /* Search the UCD record for the character comes in TMP1.
4754 Returns chartype in TMP1 and UCD offset in TMP2. */
4755 DEFINE_COMPILER;
4756 #if PCRE2_CODE_UNIT_WIDTH == 32
4757 struct sljit_jump *jump;
4758 #endif
4759 
4760 #if defined SLJIT_DEBUG && SLJIT_DEBUG
4761 /* dummy_ucd_record */
4762 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
4763 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
4764 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
4765 #endif
4766 
4767 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
4768 
4769 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4770 
4771 #if PCRE2_CODE_UNIT_WIDTH == 32
4772 if (!common->utf)
4773   {
4774   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
4775   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
4776   JUMPHERE(jump);
4777   }
4778 #endif
4779 
4780 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4781 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4782 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
4783 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
4784 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4785 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
4787 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
4788 
4789 // PH hacking
4790 //fprintf(stderr, "~~A\n");
4791   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
4792   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4793   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4794   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
4795 
4796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
4797 
4798   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
4799 
4800 // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4801 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4802 }
4803 
4804 #endif /* SUPPORT_UNICODE */
4805 
mainloop_entry(compiler_common * common)4806 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
4807 {
4808 DEFINE_COMPILER;
4809 struct sljit_label *mainloop;
4810 struct sljit_label *newlinelabel = NULL;
4811 struct sljit_jump *start;
4812 struct sljit_jump *end = NULL;
4813 struct sljit_jump *end2 = NULL;
4814 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4815 struct sljit_label *loop;
4816 struct sljit_jump *jump;
4817 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4818 jump_list *newline = NULL;
4819 sljit_u32 overall_options = common->re->overall_options;
4820 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
4821 BOOL newlinecheck = FALSE;
4822 BOOL readuchar = FALSE;
4823 
4824 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
4825     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
4826   newlinecheck = TRUE;
4827 
4828 SLJIT_ASSERT(common->abort_label == NULL);
4829 
4830 if ((overall_options & PCRE2_FIRSTLINE) != 0)
4831   {
4832   /* Search for the end of the first line. */
4833   SLJIT_ASSERT(common->match_end_ptr != 0);
4834   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4835 
4836   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4837     {
4838     mainloop = LABEL();
4839     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4840     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4841     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4842     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4843     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
4844     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
4845     JUMPHERE(end);
4846     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847     }
4848   else
4849     {
4850     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4851     mainloop = LABEL();
4852     /* Continual stores does not cause data dependency. */
4853     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
4854     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
4855     check_newlinechar(common, common->nltype, &newline, TRUE);
4856     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
4857     JUMPHERE(end);
4858     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
4859     set_jumps(newline, LABEL());
4860     }
4861 
4862   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4863   }
4864 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
4865   {
4866   /* Check whether offset limit is set and valid. */
4867   SLJIT_ASSERT(common->match_end_ptr != 0);
4868 
4869   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4870   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
4871   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
4872   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
4873   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4874 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4875   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4876 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
4877   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4878   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4879   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
4880   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
4881   JUMPHERE(end2);
4882   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
4883   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
4884   JUMPHERE(end);
4885   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
4886   }
4887 
4888 start = JUMP(SLJIT_JUMP);
4889 
4890 if (newlinecheck)
4891   {
4892   newlinelabel = LABEL();
4893   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4894   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4895   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4896   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
4897   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4898 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4899   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4900 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
4901   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4902   end2 = JUMP(SLJIT_JUMP);
4903   }
4904 
4905 mainloop = LABEL();
4906 
4907 /* Increasing the STR_PTR here requires one less jump in the most common case. */
4908 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4909 if (common->utf && !common->invalid_utf) readuchar = TRUE;
4910 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4911 if (newlinecheck) readuchar = TRUE;
4912 
4913 if (readuchar)
4914   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4915 
4916 if (newlinecheck)
4917   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
4918 
4919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4920 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4921 #if PCRE2_CODE_UNIT_WIDTH == 8
4922 if (common->invalid_utf)
4923   {
4924   /* Skip continuation code units. */
4925   loop = LABEL();
4926   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4927   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4928   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4929   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4930   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
4931   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4932   JUMPHERE(jump);
4933   }
4934 else if (common->utf)
4935   {
4936   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4937   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4938   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4939   JUMPHERE(jump);
4940   }
4941 #elif PCRE2_CODE_UNIT_WIDTH == 16
4942 if (common->invalid_utf)
4943   {
4944   /* Skip continuation code units. */
4945   loop = LABEL();
4946   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4947   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4948   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4949   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4950   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
4951   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4952   JUMPHERE(jump);
4953   }
4954 else if (common->utf)
4955   {
4956   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4957 
4958   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
4959     {
4960     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4961     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
4962     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
4963     }
4964   else
4965     {
4966     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
4967     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4968     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4969     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4970     }
4971   }
4972 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
4973 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4974 JUMPHERE(start);
4975 
4976 if (newlinecheck)
4977   {
4978   JUMPHERE(end);
4979   JUMPHERE(end2);
4980   }
4981 
4982 return mainloop;
4983 }
4984 
4985 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)4986 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
4987 {
4988 sljit_u32 i, count = chars->count;
4989 
4990 if (count == 255)
4991   return;
4992 
4993 if (count == 0)
4994   {
4995   chars->count = 1;
4996   chars->chars[0] = chr;
4997 
4998   if (last)
4999     chars->last_count = 1;
5000   return;
5001   }
5002 
5003 for (i = 0; i < count; i++)
5004   if (chars->chars[i] == chr)
5005     return;
5006 
5007 if (count >= MAX_DIFF_CHARS)
5008   {
5009   chars->count = 255;
5010   return;
5011   }
5012 
5013 chars->chars[count] = chr;
5014 chars->count = count + 1;
5015 
5016 if (last)
5017   chars->last_count++;
5018 }
5019 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5020 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5021 {
5022 /* Recursive function, which scans prefix literals. */
5023 BOOL last, any, class, caseless;
5024 int len, repeat, len_save, consumed = 0;
5025 sljit_u32 chr; /* Any unicode character. */
5026 sljit_u8 *bytes, *bytes_end, byte;
5027 PCRE2_SPTR alternative, cc_save, oc;
5028 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5029 PCRE2_UCHAR othercase[4];
5030 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5031 PCRE2_UCHAR othercase[2];
5032 #else
5033 PCRE2_UCHAR othercase[1];
5034 #endif
5035 
5036 repeat = 1;
5037 while (TRUE)
5038   {
5039   if (*rec_count == 0)
5040     return 0;
5041   (*rec_count)--;
5042 
5043   last = TRUE;
5044   any = FALSE;
5045   class = FALSE;
5046   caseless = FALSE;
5047 
5048   switch (*cc)
5049     {
5050     case OP_CHARI:
5051     caseless = TRUE;
5052     /* Fall through */
5053     case OP_CHAR:
5054     last = FALSE;
5055     cc++;
5056     break;
5057 
5058     case OP_SOD:
5059     case OP_SOM:
5060     case OP_SET_SOM:
5061     case OP_NOT_WORD_BOUNDARY:
5062     case OP_WORD_BOUNDARY:
5063     case OP_EODN:
5064     case OP_EOD:
5065     case OP_CIRC:
5066     case OP_CIRCM:
5067     case OP_DOLL:
5068     case OP_DOLLM:
5069     /* Zero width assertions. */
5070     cc++;
5071     continue;
5072 
5073     case OP_ASSERT:
5074     case OP_ASSERT_NOT:
5075     case OP_ASSERTBACK:
5076     case OP_ASSERTBACK_NOT:
5077     cc = bracketend(cc);
5078     continue;
5079 
5080     case OP_PLUSI:
5081     case OP_MINPLUSI:
5082     case OP_POSPLUSI:
5083     caseless = TRUE;
5084     /* Fall through */
5085     case OP_PLUS:
5086     case OP_MINPLUS:
5087     case OP_POSPLUS:
5088     cc++;
5089     break;
5090 
5091     case OP_EXACTI:
5092     caseless = TRUE;
5093     /* Fall through */
5094     case OP_EXACT:
5095     repeat = GET2(cc, 1);
5096     last = FALSE;
5097     cc += 1 + IMM2_SIZE;
5098     break;
5099 
5100     case OP_QUERYI:
5101     case OP_MINQUERYI:
5102     case OP_POSQUERYI:
5103     caseless = TRUE;
5104     /* Fall through */
5105     case OP_QUERY:
5106     case OP_MINQUERY:
5107     case OP_POSQUERY:
5108     len = 1;
5109     cc++;
5110 #ifdef SUPPORT_UNICODE
5111     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5112 #endif
5113     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5114     if (max_chars == 0)
5115       return consumed;
5116     last = FALSE;
5117     break;
5118 
5119     case OP_KET:
5120     cc += 1 + LINK_SIZE;
5121     continue;
5122 
5123     case OP_ALT:
5124     cc += GET(cc, 1);
5125     continue;
5126 
5127     case OP_ONCE:
5128     case OP_BRA:
5129     case OP_BRAPOS:
5130     case OP_CBRA:
5131     case OP_CBRAPOS:
5132     alternative = cc + GET(cc, 1);
5133     while (*alternative == OP_ALT)
5134       {
5135       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5136       if (max_chars == 0)
5137         return consumed;
5138       alternative += GET(alternative, 1);
5139       }
5140 
5141     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5142       cc += IMM2_SIZE;
5143     cc += 1 + LINK_SIZE;
5144     continue;
5145 
5146     case OP_CLASS:
5147 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5148     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5149       return consumed;
5150 #endif
5151     class = TRUE;
5152     break;
5153 
5154     case OP_NCLASS:
5155 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5156     if (common->utf) return consumed;
5157 #endif
5158     class = TRUE;
5159     break;
5160 
5161 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5162     case OP_XCLASS:
5163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5164     if (common->utf) return consumed;
5165 #endif
5166     any = TRUE;
5167     cc += GET(cc, 1);
5168     break;
5169 #endif
5170 
5171     case OP_DIGIT:
5172 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5173     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5174       return consumed;
5175 #endif
5176     any = TRUE;
5177     cc++;
5178     break;
5179 
5180     case OP_WHITESPACE:
5181 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5182     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5183       return consumed;
5184 #endif
5185     any = TRUE;
5186     cc++;
5187     break;
5188 
5189     case OP_WORDCHAR:
5190 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5191     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5192       return consumed;
5193 #endif
5194     any = TRUE;
5195     cc++;
5196     break;
5197 
5198     case OP_NOT:
5199     case OP_NOTI:
5200     cc++;
5201     /* Fall through. */
5202     case OP_NOT_DIGIT:
5203     case OP_NOT_WHITESPACE:
5204     case OP_NOT_WORDCHAR:
5205     case OP_ANY:
5206     case OP_ALLANY:
5207 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5208     if (common->utf) return consumed;
5209 #endif
5210     any = TRUE;
5211     cc++;
5212     break;
5213 
5214 #ifdef SUPPORT_UNICODE
5215     case OP_NOTPROP:
5216     case OP_PROP:
5217 #if PCRE2_CODE_UNIT_WIDTH != 32
5218     if (common->utf) return consumed;
5219 #endif
5220     any = TRUE;
5221     cc += 1 + 2;
5222     break;
5223 #endif
5224 
5225     case OP_TYPEEXACT:
5226     repeat = GET2(cc, 1);
5227     cc += 1 + IMM2_SIZE;
5228     continue;
5229 
5230     case OP_NOTEXACT:
5231     case OP_NOTEXACTI:
5232 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5233     if (common->utf) return consumed;
5234 #endif
5235     any = TRUE;
5236     repeat = GET2(cc, 1);
5237     cc += 1 + IMM2_SIZE + 1;
5238     break;
5239 
5240     default:
5241     return consumed;
5242     }
5243 
5244   if (any)
5245     {
5246     do
5247       {
5248       chars->count = 255;
5249 
5250       consumed++;
5251       if (--max_chars == 0)
5252         return consumed;
5253       chars++;
5254       }
5255     while (--repeat > 0);
5256 
5257     repeat = 1;
5258     continue;
5259     }
5260 
5261   if (class)
5262     {
5263     bytes = (sljit_u8*) (cc + 1);
5264     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5265 
5266     switch (*cc)
5267       {
5268       case OP_CRSTAR:
5269       case OP_CRMINSTAR:
5270       case OP_CRPOSSTAR:
5271       case OP_CRQUERY:
5272       case OP_CRMINQUERY:
5273       case OP_CRPOSQUERY:
5274       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5275       if (max_chars == 0)
5276         return consumed;
5277       break;
5278 
5279       default:
5280       case OP_CRPLUS:
5281       case OP_CRMINPLUS:
5282       case OP_CRPOSPLUS:
5283       break;
5284 
5285       case OP_CRRANGE:
5286       case OP_CRMINRANGE:
5287       case OP_CRPOSRANGE:
5288       repeat = GET2(cc, 1);
5289       if (repeat <= 0)
5290         return consumed;
5291       break;
5292       }
5293 
5294     do
5295       {
5296       if (bytes[31] & 0x80)
5297         chars->count = 255;
5298       else if (chars->count != 255)
5299         {
5300         bytes_end = bytes + 32;
5301         chr = 0;
5302         do
5303           {
5304           byte = *bytes++;
5305           SLJIT_ASSERT((chr & 0x7) == 0);
5306           if (byte == 0)
5307             chr += 8;
5308           else
5309             {
5310             do
5311               {
5312               if ((byte & 0x1) != 0)
5313                 add_prefix_char(chr, chars, TRUE);
5314               byte >>= 1;
5315               chr++;
5316               }
5317             while (byte != 0);
5318             chr = (chr + 7) & ~7;
5319             }
5320           }
5321         while (chars->count != 255 && bytes < bytes_end);
5322         bytes = bytes_end - 32;
5323         }
5324 
5325       consumed++;
5326       if (--max_chars == 0)
5327         return consumed;
5328       chars++;
5329       }
5330     while (--repeat > 0);
5331 
5332     switch (*cc)
5333       {
5334       case OP_CRSTAR:
5335       case OP_CRMINSTAR:
5336       case OP_CRPOSSTAR:
5337       return consumed;
5338 
5339       case OP_CRQUERY:
5340       case OP_CRMINQUERY:
5341       case OP_CRPOSQUERY:
5342       cc++;
5343       break;
5344 
5345       case OP_CRRANGE:
5346       case OP_CRMINRANGE:
5347       case OP_CRPOSRANGE:
5348       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5349         return consumed;
5350       cc += 1 + 2 * IMM2_SIZE;
5351       break;
5352       }
5353 
5354     repeat = 1;
5355     continue;
5356     }
5357 
5358   len = 1;
5359 #ifdef SUPPORT_UNICODE
5360   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5361 #endif
5362 
5363   if (caseless && char_has_othercase(common, cc))
5364     {
5365 #ifdef SUPPORT_UNICODE
5366     if (common->utf)
5367       {
5368       GETCHAR(chr, cc);
5369       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5370         return consumed;
5371       }
5372     else
5373 #endif
5374       {
5375       chr = *cc;
5376       othercase[0] = TABLE_GET(chr, common->fcc, chr);
5377       }
5378     }
5379   else
5380     {
5381     caseless = FALSE;
5382     othercase[0] = 0; /* Stops compiler warning - PH */
5383     }
5384 
5385   len_save = len;
5386   cc_save = cc;
5387   while (TRUE)
5388     {
5389     oc = othercase;
5390     do
5391       {
5392       len--;
5393       consumed++;
5394 
5395       chr = *cc;
5396       add_prefix_char(*cc, chars, len == 0);
5397 
5398       if (caseless)
5399         add_prefix_char(*oc, chars, len == 0);
5400 
5401       if (--max_chars == 0)
5402         return consumed;
5403       chars++;
5404       cc++;
5405       oc++;
5406       }
5407     while (len > 0);
5408 
5409     if (--repeat == 0)
5410       break;
5411 
5412     len = len_save;
5413     cc = cc_save;
5414     }
5415 
5416   repeat = 1;
5417   if (last)
5418     return consumed;
5419   }
5420 }
5421 
5422 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5423 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5424 {
5425 #if PCRE2_CODE_UNIT_WIDTH == 8
5426 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5427 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5428 #elif PCRE2_CODE_UNIT_WIDTH == 16
5429 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5430 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5431 #else
5432 #error "Unknown code width"
5433 #endif
5434 }
5435 #endif
5436 
5437 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
5438 
5439 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)5440 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
5441 {
5442 #if PCRE2_CODE_UNIT_WIDTH == 8
5443 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5444 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
5445 #elif PCRE2_CODE_UNIT_WIDTH == 16
5446 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5447 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
5448 #else
5449 #error "Unknown code width"
5450 #endif
5451 }
5452 #endif
5453 
character_to_int32(PCRE2_UCHAR chr)5454 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
5455 {
5456 sljit_u32 value = chr;
5457 #if PCRE2_CODE_UNIT_WIDTH == 8
5458 #define SSE2_COMPARE_TYPE_INDEX 0
5459 return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
5460 #elif PCRE2_CODE_UNIT_WIDTH == 16
5461 #define SSE2_COMPARE_TYPE_INDEX 1
5462 return (sljit_s32)((value << 16) | value);
5463 #elif PCRE2_CODE_UNIT_WIDTH == 32
5464 #define SSE2_COMPARE_TYPE_INDEX 2
5465 return (sljit_s32)(value);
5466 #else
5467 #error "Unsupported unit width"
5468 #endif
5469 }
5470 
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg)5471 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
5472 {
5473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5474 sljit_u8 instruction[5];
5475 #else
5476 sljit_u8 instruction[4];
5477 #endif
5478 
5479 SLJIT_ASSERT(dst_xmm_reg < 8);
5480 
5481 /* MOVDQA xmm1, xmm2/m128 */
5482 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5483 if (src_general_reg < 8)
5484   {
5485   instruction[0] = 0x66;
5486   instruction[1] = 0x0f;
5487   instruction[2] = 0x6f;
5488   instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
5489   sljit_emit_op_custom(compiler, instruction, 4);
5490   }
5491 else
5492   {
5493   instruction[0] = 0x66;
5494   instruction[1] = 0x41;
5495   instruction[2] = 0x0f;
5496   instruction[3] = 0x6f;
5497   instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
5498   sljit_emit_op_custom(compiler, instruction, 4);
5499   }
5500 #else
5501 instruction[0] = 0x66;
5502 instruction[1] = 0x0f;
5503 instruction[2] = 0x6f;
5504 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
5505 sljit_emit_op_custom(compiler, instruction, 4);
5506 #endif
5507 }
5508 
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_u32 bit,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)5509 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
5510   sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
5511 {
5512 sljit_u8 instruction[4];
5513 instruction[0] = 0x66;
5514 instruction[1] = 0x0f;
5515 
5516 if (char1 == char2 || bit != 0)
5517   {
5518   if (bit != 0)
5519     {
5520     /* POR xmm1, xmm2/m128 */
5521     /* instruction[0] = 0x66; */
5522     /* instruction[1] = 0x0f; */
5523     instruction[2] = 0xeb;
5524     instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
5525     sljit_emit_op_custom(compiler, instruction, 4);
5526     }
5527 
5528   /* PCMPEQB/W/D xmm1, xmm2/m128 */
5529   /* instruction[0] = 0x66; */
5530   /* instruction[1] = 0x0f; */
5531   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
5532   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
5533   sljit_emit_op_custom(compiler, instruction, 4);
5534   }
5535 else
5536   {
5537   /* MOVDQA xmm1, xmm2/m128 */
5538   /* instruction[0] = 0x66; */
5539   /* instruction[1] = 0x0f; */
5540   instruction[2] = 0x6f;
5541   instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
5542   sljit_emit_op_custom(compiler, instruction, 4);
5543 
5544   /* PCMPEQB/W/D xmm1, xmm2/m128 */
5545   /* instruction[0] = 0x66; */
5546   /* instruction[1] = 0x0f; */
5547   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
5548   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
5549   sljit_emit_op_custom(compiler, instruction, 4);
5550 
5551   instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
5552   sljit_emit_op_custom(compiler, instruction, 4);
5553 
5554   /* POR xmm1, xmm2/m128 */
5555   /* instruction[0] = 0x66; */
5556   /* instruction[1] = 0x0f; */
5557   instruction[2] = 0xeb;
5558   instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
5559   sljit_emit_op_custom(compiler, instruction, 4);
5560   }
5561 }
5562 
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5563 static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5564 {
5565 DEFINE_COMPILER;
5566 struct sljit_label *start;
5567 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5568 struct sljit_label *restart;
5569 #endif
5570 struct sljit_jump *quit;
5571 struct sljit_jump *partial_quit[2];
5572 sljit_u8 instruction[8];
5573 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
5574 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
5575 sljit_s32 data_ind = 0;
5576 sljit_s32 tmp_ind = 1;
5577 sljit_s32 cmp1_ind = 2;
5578 sljit_s32 cmp2_ind = 3;
5579 sljit_u32 bit = 0;
5580 
5581 SLJIT_UNUSED_ARG(offset);
5582 
5583 if (char1 != char2)
5584   {
5585   bit = char1 ^ char2;
5586   if (!is_powerof2(bit))
5587     bit = 0;
5588   }
5589 
5590 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5591 if (common->mode == PCRE2_JIT_COMPLETE)
5592   add_jump(compiler, &common->failed_match, partial_quit[0]);
5593 
5594 /* First part (unaligned start) */
5595 
5596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
5597 
5598 SLJIT_ASSERT(tmp1_ind < 8);
5599 
5600 /* MOVD xmm, r/m32 */
5601 instruction[0] = 0x66;
5602 instruction[1] = 0x0f;
5603 instruction[2] = 0x6e;
5604 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
5605 sljit_emit_op_custom(compiler, instruction, 4);
5606 
5607 if (char1 != char2)
5608   {
5609   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
5610 
5611   /* MOVD xmm, r/m32 */
5612   instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
5613   sljit_emit_op_custom(compiler, instruction, 4);
5614   }
5615 
5616 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5617 
5618 /* PSHUFD xmm1, xmm2/m128, imm8 */
5619 /* instruction[0] = 0x66; */
5620 /* instruction[1] = 0x0f; */
5621 instruction[2] = 0x70;
5622 instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
5623 instruction[4] = 0;
5624 sljit_emit_op_custom(compiler, instruction, 5);
5625 
5626 if (char1 != char2)
5627   {
5628   /* PSHUFD xmm1, xmm2/m128, imm8 */
5629   instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
5630   sljit_emit_op_custom(compiler, instruction, 5);
5631   }
5632 
5633 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5634 restart = LABEL();
5635 #endif
5636 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
5637 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
5638 
5639 load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
5640 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
5641 
5642 /* PMOVMSKB reg, xmm */
5643 /* instruction[0] = 0x66; */
5644 /* instruction[1] = 0x0f; */
5645 instruction[2] = 0xd7;
5646 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5647 sljit_emit_op_custom(compiler, instruction, 4);
5648 
5649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5650 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
5651 
5652 /* BSF r32, r/m32 */
5653 instruction[0] = 0x0f;
5654 instruction[1] = 0xbc;
5655 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5656 sljit_emit_op_custom(compiler, instruction, 3);
5657 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5658 
5659 quit = JUMP(SLJIT_NOT_ZERO);
5660 
5661 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5662 
5663 start = LABEL();
5664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
5665 
5666 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5667 if (common->mode == PCRE2_JIT_COMPLETE)
5668   add_jump(compiler, &common->failed_match, partial_quit[1]);
5669 
5670 /* Second part (aligned) */
5671 
5672 load_from_mem_sse2(compiler, 0, str_ptr_ind);
5673 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
5674 
5675 /* PMOVMSKB reg, xmm */
5676 instruction[0] = 0x66;
5677 instruction[1] = 0x0f;
5678 instruction[2] = 0xd7;
5679 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5680 sljit_emit_op_custom(compiler, instruction, 4);
5681 
5682 /* BSF r32, r/m32 */
5683 instruction[0] = 0x0f;
5684 instruction[1] = 0xbc;
5685 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5686 sljit_emit_op_custom(compiler, instruction, 3);
5687 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5688 
5689 JUMPTO(SLJIT_ZERO, start);
5690 
5691 JUMPHERE(quit);
5692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5693 
5694 if (common->mode != PCRE2_JIT_COMPLETE)
5695   {
5696   JUMPHERE(partial_quit[0]);
5697   JUMPHERE(partial_quit[1]);
5698   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
5699   CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
5700   }
5701 else
5702   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5703 
5704 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5705 if (common->utf && offset > 0)
5706   {
5707   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
5708 
5709   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
5710 
5711   quit = jump_if_utf_char_start(compiler, TMP1);
5712 
5713   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5714   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5715   OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5716   JUMPTO(SLJIT_JUMP, restart);
5717 
5718   JUMPHERE(quit);
5719   }
5720 #endif
5721 }
5722 
5723 #ifndef _WIN64
5724 
max_fast_forward_char_pair_sse2_offset(void)5725 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
5726 {
5727 #if PCRE2_CODE_UNIT_WIDTH == 8
5728 return 15;
5729 #elif PCRE2_CODE_UNIT_WIDTH == 16
5730 return 7;
5731 #elif PCRE2_CODE_UNIT_WIDTH == 32
5732 return 3;
5733 #else
5734 #error "Unsupported unit width"
5735 #endif
5736 }
5737 
fast_forward_char_pair_sse2(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)5738 static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
5739   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
5740 {
5741 DEFINE_COMPILER;
5742 sljit_u32 bit1 = 0;
5743 sljit_u32 bit2 = 0;
5744 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
5745 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
5746 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
5747 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
5748 sljit_s32 data1_ind = 0;
5749 sljit_s32 data2_ind = 1;
5750 sljit_s32 tmp_ind = 2;
5751 sljit_s32 cmp1a_ind = 3;
5752 sljit_s32 cmp1b_ind = 4;
5753 sljit_s32 cmp2a_ind = 5;
5754 sljit_s32 cmp2b_ind = 6;
5755 struct sljit_label *start;
5756 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5757 struct sljit_label *restart;
5758 #endif
5759 struct sljit_jump *jump[2];
5760 
5761 sljit_u8 instruction[8];
5762 
5763 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
5764 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
5765 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
5766 
5767 /* Initialize. */
5768 if (common->match_end_ptr != 0)
5769   {
5770   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5771   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5772   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
5773 
5774   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
5775   CMOV(SLJIT_LESS, STR_END, TMP1, 0);
5776   }
5777 
5778 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
5779 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5780 
5781 /* MOVD xmm, r/m32 */
5782 instruction[0] = 0x66;
5783 instruction[1] = 0x0f;
5784 instruction[2] = 0x6e;
5785 
5786 if (char1a == char1b)
5787   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
5788 else
5789   {
5790   bit1 = char1a ^ char1b;
5791   if (is_powerof2(bit1))
5792     {
5793     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
5794     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
5795     }
5796   else
5797     {
5798     bit1 = 0;
5799     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
5800     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
5801     }
5802   }
5803 
5804 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
5805 sljit_emit_op_custom(compiler, instruction, 4);
5806 
5807 if (char1a != char1b)
5808   {
5809   instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
5810   sljit_emit_op_custom(compiler, instruction, 4);
5811   }
5812 
5813 if (char2a == char2b)
5814   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
5815 else
5816   {
5817   bit2 = char2a ^ char2b;
5818   if (is_powerof2(bit2))
5819     {
5820     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
5821     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
5822     }
5823   else
5824     {
5825     bit2 = 0;
5826     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
5827     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
5828     }
5829   }
5830 
5831 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
5832 sljit_emit_op_custom(compiler, instruction, 4);
5833 
5834 if (char2a != char2b)
5835   {
5836   instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
5837   sljit_emit_op_custom(compiler, instruction, 4);
5838   }
5839 
5840 /* PSHUFD xmm1, xmm2/m128, imm8 */
5841 /* instruction[0] = 0x66; */
5842 /* instruction[1] = 0x0f; */
5843 instruction[2] = 0x70;
5844 instruction[4] = 0;
5845 
5846 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
5847 sljit_emit_op_custom(compiler, instruction, 5);
5848 
5849 if (char1a != char1b)
5850   {
5851   instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
5852   sljit_emit_op_custom(compiler, instruction, 5);
5853   }
5854 
5855 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
5856 sljit_emit_op_custom(compiler, instruction, 5);
5857 
5858 if (char2a != char2b)
5859   {
5860   instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
5861   sljit_emit_op_custom(compiler, instruction, 5);
5862   }
5863 
5864 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5865 restart = LABEL();
5866 #endif
5867 
5868 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
5869 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5870 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
5871 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
5872 
5873 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
5874 
5875 jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
5876 
5877 load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
5878 
5879 /* MOVDQA xmm1, xmm2/m128 */
5880 /* instruction[0] = 0x66; */
5881 /* instruction[1] = 0x0f; */
5882 instruction[2] = 0x6f;
5883 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
5884 sljit_emit_op_custom(compiler, instruction, 4);
5885 
5886 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5887 /* instruction[0] = 0x66; */
5888 /* instruction[1] = 0x0f; */
5889 instruction[2] = 0x73;
5890 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
5891 instruction[4] = diff;
5892 sljit_emit_op_custom(compiler, instruction, 5);
5893 
5894 /* PSRLDQ xmm1, xmm2/m128, imm8 */
5895 /* instruction[0] = 0x66; */
5896 /* instruction[1] = 0x0f; */
5897 /* instruction[2] = 0x73; */
5898 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
5899 instruction[4] = 16 - diff;
5900 sljit_emit_op_custom(compiler, instruction, 5);
5901 
5902 /* POR xmm1, xmm2/m128 */
5903 /* instruction[0] = 0x66; */
5904 /* instruction[1] = 0x0f; */
5905 instruction[2] = 0xeb;
5906 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
5907 sljit_emit_op_custom(compiler, instruction, 4);
5908 
5909 jump[1] = JUMP(SLJIT_JUMP);
5910 
5911 JUMPHERE(jump[0]);
5912 
5913 /* MOVDQA xmm1, xmm2/m128 */
5914 /* instruction[0] = 0x66; */
5915 /* instruction[1] = 0x0f; */
5916 instruction[2] = 0x6f;
5917 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
5918 sljit_emit_op_custom(compiler, instruction, 4);
5919 
5920 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5921 /* instruction[0] = 0x66; */
5922 /* instruction[1] = 0x0f; */
5923 instruction[2] = 0x73;
5924 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
5925 instruction[4] = diff;
5926 sljit_emit_op_custom(compiler, instruction, 5);
5927 
5928 JUMPHERE(jump[1]);
5929 
5930 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
5931 
5932 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
5933 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
5934 
5935 /* PAND xmm1, xmm2/m128 */
5936 /* instruction[0] = 0x66; */
5937 /* instruction[1] = 0x0f; */
5938 instruction[2] = 0xdb;
5939 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
5940 sljit_emit_op_custom(compiler, instruction, 4);
5941 
5942 /* PMOVMSKB reg, xmm */
5943 /* instruction[0] = 0x66; */
5944 /* instruction[1] = 0x0f; */
5945 instruction[2] = 0xd7;
5946 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5947 sljit_emit_op_custom(compiler, instruction, 4);
5948 
5949 /* Ignore matches before the first STR_PTR. */
5950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5951 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
5952 
5953 /* BSF r32, r/m32 */
5954 instruction[0] = 0x0f;
5955 instruction[1] = 0xbc;
5956 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5957 sljit_emit_op_custom(compiler, instruction, 3);
5958 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5959 
5960 jump[0] = JUMP(SLJIT_NOT_ZERO);
5961 
5962 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5963 
5964 /* Main loop. */
5965 instruction[0] = 0x66;
5966 instruction[1] = 0x0f;
5967 
5968 start = LABEL();
5969 
5970 load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
5971 
5972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
5973 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5974 
5975 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
5976 
5977 /* PSRLDQ xmm1, xmm2/m128, imm8 */
5978 /* instruction[0] = 0x66; */
5979 /* instruction[1] = 0x0f; */
5980 instruction[2] = 0x73;
5981 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
5982 instruction[4] = 16 - diff;
5983 sljit_emit_op_custom(compiler, instruction, 5);
5984 
5985 /* MOVDQA xmm1, xmm2/m128 */
5986 /* instruction[0] = 0x66; */
5987 /* instruction[1] = 0x0f; */
5988 instruction[2] = 0x6f;
5989 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
5990 sljit_emit_op_custom(compiler, instruction, 4);
5991 
5992 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5993 /* instruction[0] = 0x66; */
5994 /* instruction[1] = 0x0f; */
5995 instruction[2] = 0x73;
5996 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
5997 instruction[4] = diff;
5998 sljit_emit_op_custom(compiler, instruction, 5);
5999 
6000 /* POR xmm1, xmm2/m128 */
6001 /* instruction[0] = 0x66; */
6002 /* instruction[1] = 0x0f; */
6003 instruction[2] = 0xeb;
6004 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
6005 sljit_emit_op_custom(compiler, instruction, 4);
6006 
6007 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
6008 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
6009 
6010 /* PAND xmm1, xmm2/m128 */
6011 /* instruction[0] = 0x66; */
6012 /* instruction[1] = 0x0f; */
6013 instruction[2] = 0xdb;
6014 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
6015 sljit_emit_op_custom(compiler, instruction, 4);
6016 
6017 /* PMOVMSKB reg, xmm */
6018 /* instruction[0] = 0x66; */
6019 /* instruction[1] = 0x0f; */
6020 instruction[2] = 0xd7;
6021 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
6022 sljit_emit_op_custom(compiler, instruction, 4);
6023 
6024 /* BSF r32, r/m32 */
6025 instruction[0] = 0x0f;
6026 instruction[1] = 0xbc;
6027 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
6028 sljit_emit_op_custom(compiler, instruction, 3);
6029 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6030 
6031 JUMPTO(SLJIT_ZERO, start);
6032 
6033 JUMPHERE(jump[0]);
6034 
6035 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6036 
6037 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6038 
6039 if (common->match_end_ptr != 0)
6040   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6041 
6042 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6043 if (common->utf)
6044   {
6045   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
6046 
6047   jump[0] = jump_if_utf_char_start(compiler, TMP1);
6048 
6049   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6050   CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
6051 
6052   add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
6053 
6054   JUMPHERE(jump[0]);
6055   }
6056 #endif
6057 
6058 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
6059 
6060 if (common->match_end_ptr != 0)
6061   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6062 }
6063 
check_fast_forward_char_pair_sse2(compiler_common * common,fast_forward_char_data * chars,int max)6064 static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
6065 {
6066 sljit_s32 i, j, priority, count;
6067 sljit_u32 priorities;
6068 PCRE2_UCHAR a1, a2, b1, b2;
6069 
6070 priorities = 0;
6071 
6072 count = 0;
6073 for (i = 0; i < max; i++)
6074   {
6075   if (chars[i].last_count > 2)
6076     {
6077     SLJIT_ASSERT(chars[i].last_count <= 7);
6078 
6079     priorities |= (1 << chars[i].last_count);
6080     count++;
6081     }
6082   }
6083 
6084 if (count < 2)
6085   return FALSE;
6086 
6087 for (priority = 7; priority > 2; priority--)
6088   {
6089   if ((priorities & (1 << priority)) == 0)
6090     continue;
6091 
6092   for (i = max - 1; i >= 1; i--)
6093     if (chars[i].last_count >= priority)
6094       {
6095       SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
6096 
6097       a1 = chars[i].chars[0];
6098       a2 = chars[i].chars[1];
6099 
6100       j = i - max_fast_forward_char_pair_sse2_offset();
6101       if (j < 0)
6102         j = 0;
6103 
6104       while (j < i)
6105         {
6106         if (chars[j].last_count >= priority)
6107           {
6108           b1 = chars[j].chars[0];
6109           b2 = chars[j].chars[1];
6110 
6111           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6112             {
6113             fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
6114             return TRUE;
6115             }
6116           }
6117         j++;
6118         }
6119       }
6120   }
6121 
6122 return FALSE;
6123 }
6124 
6125 #endif
6126 
6127 #undef SSE2_COMPARE_TYPE_INDEX
6128 
6129 #endif
6130 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6131 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6132 {
6133 DEFINE_COMPILER;
6134 struct sljit_label *start;
6135 struct sljit_jump *match;
6136 struct sljit_jump *partial_quit;
6137 PCRE2_UCHAR mask;
6138 BOOL has_match_end = (common->match_end_ptr != 0);
6139 
6140 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6141 
6142 if (has_match_end)
6143   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6144 
6145 if (offset > 0)
6146   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6147 
6148 if (has_match_end)
6149   {
6150   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6151 
6152   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6153   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6154   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6155   }
6156 
6157 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
6158 
6159 /* SSE2 accelerated first character search. */
6160 
6161 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
6162   {
6163   fast_forward_first_char2_sse2(common, char1, char2, offset);
6164 
6165   if (offset > 0)
6166     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6167 
6168   if (has_match_end)
6169     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6170   return;
6171   }
6172 
6173 #endif
6174 
6175 start = LABEL();
6176 
6177 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6178 if (common->mode == PCRE2_JIT_COMPLETE)
6179   add_jump(compiler, &common->failed_match, partial_quit);
6180 
6181 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6183 
6184 if (char1 == char2)
6185   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6186 else
6187   {
6188   mask = char1 ^ char2;
6189   if (is_powerof2(mask))
6190     {
6191     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6192     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6193     }
6194   else
6195     {
6196     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6197     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6198     JUMPHERE(match);
6199     }
6200   }
6201 
6202 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6203 if (common->utf && offset > 0)
6204   {
6205   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6206   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207   }
6208 #endif
6209 
6210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6211 
6212 if (common->mode != PCRE2_JIT_COMPLETE)
6213   JUMPHERE(partial_quit);
6214 
6215 if (has_match_end)
6216   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6217 }
6218 
fast_forward_first_n_chars(compiler_common * common)6219 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6220 {
6221 DEFINE_COMPILER;
6222 struct sljit_label *start;
6223 struct sljit_jump *match;
6224 fast_forward_char_data chars[MAX_N_CHARS];
6225 sljit_s32 offset;
6226 PCRE2_UCHAR mask;
6227 PCRE2_UCHAR *char_set, *char_set_end;
6228 int i, max, from;
6229 int range_right = -1, range_len;
6230 sljit_u8 *update_table = NULL;
6231 BOOL in_range;
6232 sljit_u32 rec_count;
6233 
6234 for (i = 0; i < MAX_N_CHARS; i++)
6235   {
6236   chars[i].count = 0;
6237   chars[i].last_count = 0;
6238   }
6239 
6240 rec_count = 10000;
6241 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6242 
6243 if (max < 1)
6244   return FALSE;
6245 
6246 /* Convert last_count to priority. */
6247 for (i = 0; i < max; i++)
6248   {
6249   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6250 
6251   if (chars[i].count == 1)
6252     {
6253     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6254     /* Simplifies algorithms later. */
6255     chars[i].chars[1] = chars[i].chars[0];
6256     }
6257   else if (chars[i].count == 2)
6258     {
6259     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6260 
6261     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6262       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6263     else
6264       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6265     }
6266   else
6267     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6268   }
6269 
6270 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
6271 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max))
6272   return TRUE;
6273 #endif
6274 
6275 in_range = FALSE;
6276 /* Prevent compiler "uninitialized" warning */
6277 from = 0;
6278 range_len = 4 /* minimum length */ - 1;
6279 for (i = 0; i <= max; i++)
6280   {
6281   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6282     {
6283     range_len = i - from;
6284     range_right = i - 1;
6285     }
6286 
6287   if (i < max && chars[i].count < 255)
6288     {
6289     SLJIT_ASSERT(chars[i].count > 0);
6290     if (!in_range)
6291       {
6292       in_range = TRUE;
6293       from = i;
6294       }
6295     }
6296   else
6297     in_range = FALSE;
6298   }
6299 
6300 if (range_right >= 0)
6301   {
6302   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6303   if (update_table == NULL)
6304     return TRUE;
6305   memset(update_table, IN_UCHARS(range_len), 256);
6306 
6307   for (i = 0; i < range_len; i++)
6308     {
6309     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6310 
6311     char_set = chars[range_right - i].chars;
6312     char_set_end = char_set + chars[range_right - i].count;
6313     do
6314       {
6315       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6316         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6317       char_set++;
6318       }
6319     while (char_set < char_set_end);
6320     }
6321   }
6322 
6323 offset = -1;
6324 /* Scan forward. */
6325 for (i = 0; i < max; i++)
6326   {
6327   if (range_right == i)
6328     continue;
6329 
6330   if (offset == -1)
6331     {
6332     if (chars[i].last_count >= 2)
6333       offset = i;
6334     }
6335   else if (chars[offset].last_count < chars[i].last_count)
6336     offset = i;
6337   }
6338 
6339 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6340 
6341 if (range_right < 0)
6342   {
6343   if (offset < 0)
6344     return FALSE;
6345   /* Works regardless the value is 1 or 2. */
6346   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6347   return TRUE;
6348   }
6349 
6350 SLJIT_ASSERT(range_right != offset);
6351 
6352 if (common->match_end_ptr != 0)
6353   {
6354   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6355   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6356   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6357   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6358   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6359   }
6360 else
6361   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6362 
6363 SLJIT_ASSERT(range_right >= 0);
6364 
6365 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
6366 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6367 #endif
6368 
6369 start = LABEL();
6370 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6371 
6372 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6373 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6374 #else
6375 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6376 #endif
6377 
6378 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
6379 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6380 #else
6381 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6382 #endif
6383 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6384 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6385 
6386 if (offset >= 0)
6387   {
6388   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6389   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6390 
6391   if (chars[offset].count == 1)
6392     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6393   else
6394     {
6395     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6396     if (is_powerof2(mask))
6397       {
6398       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6399       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6400       }
6401     else
6402       {
6403       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6404       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6405       JUMPHERE(match);
6406       }
6407     }
6408   }
6409 
6410 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6411 if (common->utf && offset != 0)
6412   {
6413   if (offset < 0)
6414     {
6415     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6416     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6417     }
6418   else
6419     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6420 
6421   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6422 
6423   if (offset < 0)
6424     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6425   }
6426 #endif
6427 
6428 if (offset >= 0)
6429   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6430 
6431 if (common->match_end_ptr != 0)
6432   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6433 else
6434   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6435 return TRUE;
6436 }
6437 
fast_forward_first_char(compiler_common * common)6438 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6439 {
6440 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6441 PCRE2_UCHAR oc;
6442 
6443 oc = first_char;
6444 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6445   {
6446   oc = TABLE_GET(first_char, common->fcc, first_char);
6447 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6448   if (first_char > 127 && common->utf)
6449     oc = UCD_OTHERCASE(first_char);
6450 #endif
6451   }
6452 
6453 fast_forward_first_char2(common, first_char, oc, 0);
6454 }
6455 
fast_forward_newline(compiler_common * common)6456 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6457 {
6458 DEFINE_COMPILER;
6459 struct sljit_label *loop;
6460 struct sljit_jump *lastchar;
6461 struct sljit_jump *firstchar;
6462 struct sljit_jump *quit;
6463 struct sljit_jump *foundcr = NULL;
6464 struct sljit_jump *notfoundnl;
6465 jump_list *newline = NULL;
6466 
6467 if (common->match_end_ptr != 0)
6468   {
6469   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6470   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6471   }
6472 
6473 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6474   {
6475   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6476   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6477   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6478   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6479   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6480 
6481   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6482   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6483   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6484 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6485   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6486 #endif
6487   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6488 
6489   loop = LABEL();
6490   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6491   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6492   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6493   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6494   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6495   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6496 
6497   JUMPHERE(quit);
6498   JUMPHERE(firstchar);
6499   JUMPHERE(lastchar);
6500 
6501   if (common->match_end_ptr != 0)
6502     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6503   return;
6504   }
6505 
6506 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6507 /* Example: match /^/ to \r\n from offset 1. */
6508 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6509 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6510 move_back(common, NULL, FALSE);
6511 
6512 loop = LABEL();
6513 common->ff_newline_shortcut = loop;
6514 
6515 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6516 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6517 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6518   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6519 check_newlinechar(common, common->nltype, &newline, FALSE);
6520 set_jumps(newline, loop);
6521 
6522 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6523   {
6524   quit = JUMP(SLJIT_JUMP);
6525   JUMPHERE(foundcr);
6526   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6527   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6528   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6529   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6530 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6531   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6532 #endif
6533   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6534   JUMPHERE(notfoundnl);
6535   JUMPHERE(quit);
6536   }
6537 JUMPHERE(lastchar);
6538 JUMPHERE(firstchar);
6539 
6540 if (common->match_end_ptr != 0)
6541   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6542 }
6543 
6544 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6545 
fast_forward_start_bits(compiler_common * common)6546 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6547 {
6548 DEFINE_COMPILER;
6549 const sljit_u8 *start_bits = common->re->start_bitmap;
6550 struct sljit_label *start;
6551 struct sljit_jump *partial_quit;
6552 #if PCRE2_CODE_UNIT_WIDTH != 8
6553 struct sljit_jump *found = NULL;
6554 #endif
6555 jump_list *matches = NULL;
6556 
6557 if (common->match_end_ptr != 0)
6558   {
6559   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6560   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6561   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6562   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6563   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6564   }
6565 
6566 start = LABEL();
6567 
6568 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6569 if (common->mode == PCRE2_JIT_COMPLETE)
6570   add_jump(compiler, &common->failed_match, partial_quit);
6571 
6572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6573 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6574 
6575 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6576   {
6577 #if PCRE2_CODE_UNIT_WIDTH != 8
6578   if ((start_bits[31] & 0x80) != 0)
6579     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6580   else
6581     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6582 #elif defined SUPPORT_UNICODE
6583   if (common->utf && is_char7_bitset(start_bits, FALSE))
6584     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6585 #endif
6586   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6587   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6588   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6589   if (sljit_get_register_index(TMP3) >= 0)
6590     {
6591     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6592     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6593     }
6594   else
6595     {
6596     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6597     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6598     }
6599   JUMPTO(SLJIT_ZERO, start);
6600   }
6601 else
6602   set_jumps(matches, start);
6603 
6604 #if PCRE2_CODE_UNIT_WIDTH != 8
6605 if (found != NULL)
6606   JUMPHERE(found);
6607 #endif
6608 
6609 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6610 
6611 if (common->mode != PCRE2_JIT_COMPLETE)
6612   JUMPHERE(partial_quit);
6613 
6614 if (common->match_end_ptr != 0)
6615   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6616 }
6617 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6618 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6619 {
6620 DEFINE_COMPILER;
6621 struct sljit_label *loop;
6622 struct sljit_jump *toolong;
6623 struct sljit_jump *alreadyfound;
6624 struct sljit_jump *found;
6625 struct sljit_jump *foundoc = NULL;
6626 struct sljit_jump *notfound;
6627 sljit_u32 oc, bit;
6628 
6629 SLJIT_ASSERT(common->req_char_ptr != 0);
6630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6631 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
6632 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
6633 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
6634 
6635 if (has_firstchar)
6636   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6637 else
6638   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6639 
6640 loop = LABEL();
6641 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
6642 
6643 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6644 oc = req_char;
6645 if (caseless)
6646   {
6647   oc = TABLE_GET(req_char, common->fcc, req_char);
6648 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6649   if (req_char > 127 && common->utf)
6650     oc = UCD_OTHERCASE(req_char);
6651 #endif
6652   }
6653 if (req_char == oc)
6654   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6655 else
6656   {
6657   bit = req_char ^ oc;
6658   if (is_powerof2(bit))
6659     {
6660     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6661     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6662     }
6663   else
6664     {
6665     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6666     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6667     }
6668   }
6669 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6670 JUMPTO(SLJIT_JUMP, loop);
6671 
6672 JUMPHERE(found);
6673 if (foundoc)
6674   JUMPHERE(foundoc);
6675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6676 JUMPHERE(alreadyfound);
6677 JUMPHERE(toolong);
6678 return notfound;
6679 }
6680 
do_revertframes(compiler_common * common)6681 static void do_revertframes(compiler_common *common)
6682 {
6683 DEFINE_COMPILER;
6684 struct sljit_jump *jump;
6685 struct sljit_label *mainloop;
6686 
6687 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6688 GET_LOCAL_BASE(TMP1, 0, 0);
6689 
6690 /* Drop frames until we reach STACK_TOP. */
6691 mainloop = LABEL();
6692 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6693 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6694 
6695 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6696 if (sljit_get_register_index(TMP3) < 0)
6697   {
6698   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6699   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6700   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6701   }
6702 else
6703   {
6704   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6705   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6706   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6707   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6708   GET_LOCAL_BASE(TMP1, 0, 0);
6709   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6710   }
6711 JUMPTO(SLJIT_JUMP, mainloop);
6712 
6713 JUMPHERE(jump);
6714 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6715 /* End of reverting values. */
6716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6717 
6718 JUMPHERE(jump);
6719 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6720 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6721 if (sljit_get_register_index(TMP3) < 0)
6722   {
6723   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6724   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6725   }
6726 else
6727   {
6728   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6729   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6730   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6731   }
6732 JUMPTO(SLJIT_JUMP, mainloop);
6733 }
6734 
check_wordboundary(compiler_common * common)6735 static void check_wordboundary(compiler_common *common)
6736 {
6737 DEFINE_COMPILER;
6738 struct sljit_jump *skipread;
6739 jump_list *skipread_list = NULL;
6740 jump_list *invalid_utf = NULL;
6741 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6742 struct sljit_jump *jump;
6743 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6744 
6745 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6746 
6747 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6748 /* Get type of the previous char, and put it to TMP3. */
6749 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6750 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6751 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6752 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6753 
6754 if (common->mode == PCRE2_JIT_COMPLETE)
6755   peek_char_back(common, READ_CHAR_MAX, &invalid_utf);
6756 else
6757   {
6758   move_back(common, &invalid_utf, FALSE);
6759   check_start_used_ptr(common);
6760   /* No need precise read since match fails anyway. */
6761   read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR);
6762   }
6763 
6764 /* Testing char type. */
6765 #ifdef SUPPORT_UNICODE
6766 if (common->use_ucp)
6767   {
6768   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6769   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6770   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6771   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6772   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6773   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6774   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6775   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6776   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6777   JUMPHERE(jump);
6778   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6779   }
6780 else
6781 #endif /* SUPPORT_UNICODE */
6782   {
6783 #if PCRE2_CODE_UNIT_WIDTH != 8
6784   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6785 #elif defined SUPPORT_UNICODE
6786   /* Here TMP3 has already been zeroed. */
6787   jump = NULL;
6788   if (common->utf)
6789     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6790 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6791   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6792   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6793   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6794 #if PCRE2_CODE_UNIT_WIDTH != 8
6795   JUMPHERE(jump);
6796 #elif defined SUPPORT_UNICODE
6797   if (jump != NULL)
6798     JUMPHERE(jump);
6799 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6800   }
6801 JUMPHERE(skipread);
6802 
6803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6804 check_str_end(common, &skipread_list);
6805 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf);
6806 
6807 /* Testing char type. This is a code duplication. */
6808 #ifdef SUPPORT_UNICODE
6809 if (common->use_ucp)
6810   {
6811   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6812   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6813   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6814   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6815   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6816   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6817   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6818   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6819   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6820   JUMPHERE(jump);
6821   }
6822 else
6823 #endif /* SUPPORT_UNICODE */
6824   {
6825 #if PCRE2_CODE_UNIT_WIDTH != 8
6826   /* TMP2 may be destroyed by peek_char. */
6827   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6828   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6829 #elif defined SUPPORT_UNICODE
6830   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6831   jump = NULL;
6832   if (common->utf)
6833     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6834 #endif
6835   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6836   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6837   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6838 #if PCRE2_CODE_UNIT_WIDTH != 8
6839   JUMPHERE(jump);
6840 #elif defined SUPPORT_UNICODE
6841   if (jump != NULL)
6842     JUMPHERE(jump);
6843 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6844   }
6845 set_jumps(skipread_list, LABEL());
6846 
6847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6848 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6849 sljit_emit_fast_return(compiler, TMP1, 0);
6850 
6851 #ifdef SUPPORT_UNICODE
6852 if (common->invalid_utf)
6853   {
6854   SLJIT_ASSERT(invalid_utf != NULL);
6855 
6856   set_jumps(invalid_utf, LABEL());
6857   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6858   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6859   sljit_emit_fast_return(compiler, TMP1, 0);
6860   return;
6861   }
6862 #endif /* SUPPORT_UNICODE */
6863 }
6864 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6865 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6866 {
6867 /* May destroy TMP1. */
6868 DEFINE_COMPILER;
6869 int ranges[MAX_CLASS_RANGE_SIZE];
6870 sljit_u8 bit, cbit, all;
6871 int i, byte, length = 0;
6872 
6873 bit = bits[0] & 0x1;
6874 /* All bits will be zero or one (since bit is zero or one). */
6875 all = -bit;
6876 
6877 for (i = 0; i < 256; )
6878   {
6879   byte = i >> 3;
6880   if ((i & 0x7) == 0 && bits[byte] == all)
6881     i += 8;
6882   else
6883     {
6884     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6885     if (cbit != bit)
6886       {
6887       if (length >= MAX_CLASS_RANGE_SIZE)
6888         return FALSE;
6889       ranges[length] = i;
6890       length++;
6891       bit = cbit;
6892       all = -cbit;
6893       }
6894     i++;
6895     }
6896   }
6897 
6898 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6899   {
6900   if (length >= MAX_CLASS_RANGE_SIZE)
6901     return FALSE;
6902   ranges[length] = 256;
6903   length++;
6904   }
6905 
6906 if (length < 0 || length > 4)
6907   return FALSE;
6908 
6909 bit = bits[0] & 0x1;
6910 if (invert) bit ^= 0x1;
6911 
6912 /* No character is accepted. */
6913 if (length == 0 && bit == 0)
6914   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6915 
6916 switch(length)
6917   {
6918   case 0:
6919   /* When bit != 0, all characters are accepted. */
6920   return TRUE;
6921 
6922   case 1:
6923   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6924   return TRUE;
6925 
6926   case 2:
6927   if (ranges[0] + 1 != ranges[1])
6928     {
6929     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6930     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6931     }
6932   else
6933     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6934   return TRUE;
6935 
6936   case 3:
6937   if (bit != 0)
6938     {
6939     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6940     if (ranges[0] + 1 != ranges[1])
6941       {
6942       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6943       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6944       }
6945     else
6946       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6947     return TRUE;
6948     }
6949 
6950   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6951   if (ranges[1] + 1 != ranges[2])
6952     {
6953     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6954     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6955     }
6956   else
6957     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6958   return TRUE;
6959 
6960   case 4:
6961   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6962       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6963       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6964       && is_powerof2(ranges[2] - ranges[0]))
6965     {
6966     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6967     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6968     if (ranges[2] + 1 != ranges[3])
6969       {
6970       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6971       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6972       }
6973     else
6974       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6975     return TRUE;
6976     }
6977 
6978   if (bit != 0)
6979     {
6980     i = 0;
6981     if (ranges[0] + 1 != ranges[1])
6982       {
6983       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6984       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6985       i = ranges[0];
6986       }
6987     else
6988       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6989 
6990     if (ranges[2] + 1 != ranges[3])
6991       {
6992       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6993       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6994       }
6995     else
6996       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6997     return TRUE;
6998     }
6999 
7000   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7001   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7002   if (ranges[1] + 1 != ranges[2])
7003     {
7004     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7005     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7006     }
7007   else
7008     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7009   return TRUE;
7010 
7011   default:
7012   SLJIT_UNREACHABLE();
7013   return FALSE;
7014   }
7015 }
7016 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7017 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7018 {
7019 /* May destroy TMP1. */
7020 DEFINE_COMPILER;
7021 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7022 uint8_t byte;
7023 sljit_s32 type;
7024 int i, j, k, len, c;
7025 
7026 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7027   return FALSE;
7028 
7029 len = 0;
7030 
7031 for (i = 0; i < 32; i++)
7032   {
7033   byte = bits[i];
7034 
7035   if (nclass)
7036     byte = ~byte;
7037 
7038   j = 0;
7039   while (byte != 0)
7040     {
7041     if (byte & 0x1)
7042       {
7043       c = i * 8 + j;
7044 
7045       k = len;
7046 
7047       if ((c & 0x20) != 0)
7048         {
7049         for (k = 0; k < len; k++)
7050           if (char_list[k] == c - 0x20)
7051             {
7052             char_list[k] |= 0x120;
7053             break;
7054             }
7055         }
7056 
7057       if (k == len)
7058         {
7059         if (len >= MAX_CLASS_CHARS_SIZE)
7060           return FALSE;
7061 
7062         char_list[len++] = (uint16_t) c;
7063         }
7064       }
7065 
7066     byte >>= 1;
7067     j++;
7068     }
7069   }
7070 
7071 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7072 
7073 i = 0;
7074 j = 0;
7075 
7076 if (char_list[0] == 0)
7077   {
7078   i++;
7079   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
7080   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7081   }
7082 else
7083   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7084 
7085 while (i < len)
7086   {
7087   if ((char_list[i] & 0x100) != 0)
7088     j++;
7089   else
7090     {
7091     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
7092     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7093     }
7094   i++;
7095   }
7096 
7097 if (j != 0)
7098   {
7099   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7100 
7101   for (i = 0; i < len; i++)
7102     if ((char_list[i] & 0x100) != 0)
7103       {
7104       j--;
7105       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7106       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7107       }
7108   }
7109 
7110 if (invert)
7111   nclass = !nclass;
7112 
7113 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7114 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7115 return TRUE;
7116 }
7117 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7118 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7119 {
7120 /* May destroy TMP1. */
7121 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7122   return TRUE;
7123 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7124 }
7125 
check_anynewline(compiler_common * common)7126 static void check_anynewline(compiler_common *common)
7127 {
7128 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7129 DEFINE_COMPILER;
7130 
7131 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7132 
7133 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7134 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7136 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7137 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7138 #if PCRE2_CODE_UNIT_WIDTH == 8
7139 if (common->utf)
7140   {
7141 #endif
7142   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7143   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7144   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7145 #if PCRE2_CODE_UNIT_WIDTH == 8
7146   }
7147 #endif
7148 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7149 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7150 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7151 }
7152 
check_hspace(compiler_common * common)7153 static void check_hspace(compiler_common *common)
7154 {
7155 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7156 DEFINE_COMPILER;
7157 
7158 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7159 
7160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7161 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7162 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7163 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7164 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7165 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7166 #if PCRE2_CODE_UNIT_WIDTH == 8
7167 if (common->utf)
7168   {
7169 #endif
7170   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7171   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7172   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7173   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7174   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7175   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7176   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7177   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7178   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7179   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7180   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7181   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7182   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7183 #if PCRE2_CODE_UNIT_WIDTH == 8
7184   }
7185 #endif
7186 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7187 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7188 
7189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7190 }
7191 
check_vspace(compiler_common * common)7192 static void check_vspace(compiler_common *common)
7193 {
7194 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7195 DEFINE_COMPILER;
7196 
7197 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7198 
7199 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7200 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7201 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7202 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7203 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7204 #if PCRE2_CODE_UNIT_WIDTH == 8
7205 if (common->utf)
7206   {
7207 #endif
7208   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7209   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7210   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7211 #if PCRE2_CODE_UNIT_WIDTH == 8
7212   }
7213 #endif
7214 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7215 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7216 
7217 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7218 }
7219 
do_casefulcmp(compiler_common * common)7220 static void do_casefulcmp(compiler_common *common)
7221 {
7222 DEFINE_COMPILER;
7223 struct sljit_jump *jump;
7224 struct sljit_label *label;
7225 int char1_reg;
7226 int char2_reg;
7227 
7228 if (sljit_get_register_index(TMP3) < 0)
7229   {
7230   char1_reg = STR_END;
7231   char2_reg = STACK_TOP;
7232   }
7233 else
7234   {
7235   char1_reg = TMP3;
7236   char2_reg = RETURN_ADDR;
7237   }
7238 
7239 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7240 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7241 
7242 if (char1_reg == STR_END)
7243   {
7244   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7245   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7246   }
7247 
7248 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7249   {
7250   label = LABEL();
7251   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7252   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7253   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7254   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7255   JUMPTO(SLJIT_NOT_ZERO, label);
7256 
7257   JUMPHERE(jump);
7258   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7259   }
7260 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7261   {
7262   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7263   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7264 
7265   label = LABEL();
7266   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7267   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7268   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7269   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7270   JUMPTO(SLJIT_NOT_ZERO, label);
7271 
7272   JUMPHERE(jump);
7273   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7274   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7275   }
7276 else
7277   {
7278   label = LABEL();
7279   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7280   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7281   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7282   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7283   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7284   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7285   JUMPTO(SLJIT_NOT_ZERO, label);
7286 
7287   JUMPHERE(jump);
7288   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7289   }
7290 
7291 if (char1_reg == STR_END)
7292   {
7293   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7294   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7295   }
7296 
7297 sljit_emit_fast_return(compiler, TMP1, 0);
7298 }
7299 
do_caselesscmp(compiler_common * common)7300 static void do_caselesscmp(compiler_common *common)
7301 {
7302 DEFINE_COMPILER;
7303 struct sljit_jump *jump;
7304 struct sljit_label *label;
7305 int char1_reg = STR_END;
7306 int char2_reg;
7307 int lcc_table;
7308 int opt_type = 0;
7309 
7310 if (sljit_get_register_index(TMP3) < 0)
7311   {
7312   char2_reg = STACK_TOP;
7313   lcc_table = STACK_LIMIT;
7314   }
7315 else
7316   {
7317   char2_reg = RETURN_ADDR;
7318   lcc_table = TMP3;
7319   }
7320 
7321 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7322   opt_type = 1;
7323 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7324   opt_type = 2;
7325 
7326 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7327 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7328 
7329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7330 
7331 if (char2_reg == STACK_TOP)
7332   {
7333   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7334   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7335   }
7336 
7337 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7338 
7339 if (opt_type == 1)
7340   {
7341   label = LABEL();
7342   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7343   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7344   }
7345 else if (opt_type == 2)
7346   {
7347   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7348   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7349 
7350   label = LABEL();
7351   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7352   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7353   }
7354 else
7355   {
7356   label = LABEL();
7357   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7358   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7359   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7360   }
7361 
7362 #if PCRE2_CODE_UNIT_WIDTH != 8
7363 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7364 #endif
7365 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7366 #if PCRE2_CODE_UNIT_WIDTH != 8
7367 JUMPHERE(jump);
7368 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7369 #endif
7370 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7371 #if PCRE2_CODE_UNIT_WIDTH != 8
7372 JUMPHERE(jump);
7373 #endif
7374 
7375 if (opt_type == 0)
7376   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7377 
7378 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7379 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7380 JUMPTO(SLJIT_NOT_ZERO, label);
7381 
7382 JUMPHERE(jump);
7383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7384 
7385 if (opt_type == 2)
7386   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7387 
7388 if (char2_reg == STACK_TOP)
7389   {
7390   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7391   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7392   }
7393 
7394 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7395 sljit_emit_fast_return(compiler, TMP1, 0);
7396 }
7397 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7398 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7399     compare_context *context, jump_list **backtracks)
7400 {
7401 DEFINE_COMPILER;
7402 unsigned int othercasebit = 0;
7403 PCRE2_SPTR othercasechar = NULL;
7404 #ifdef SUPPORT_UNICODE
7405 int utflength;
7406 #endif
7407 
7408 if (caseless && char_has_othercase(common, cc))
7409   {
7410   othercasebit = char_get_othercase_bit(common, cc);
7411   SLJIT_ASSERT(othercasebit);
7412   /* Extracting bit difference info. */
7413 #if PCRE2_CODE_UNIT_WIDTH == 8
7414   othercasechar = cc + (othercasebit >> 8);
7415   othercasebit &= 0xff;
7416 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7417   /* Note that this code only handles characters in the BMP. If there
7418   ever are characters outside the BMP whose othercase differs in only one
7419   bit from itself (there currently are none), this code will need to be
7420   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7421   othercasechar = cc + (othercasebit >> 9);
7422   if ((othercasebit & 0x100) != 0)
7423     othercasebit = (othercasebit & 0xff) << 8;
7424   else
7425     othercasebit &= 0xff;
7426 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7427   }
7428 
7429 if (context->sourcereg == -1)
7430   {
7431 #if PCRE2_CODE_UNIT_WIDTH == 8
7432 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7433   if (context->length >= 4)
7434     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7435   else if (context->length >= 2)
7436     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7437   else
7438 #endif
7439     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7440 #elif PCRE2_CODE_UNIT_WIDTH == 16
7441 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7442   if (context->length >= 4)
7443     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7444   else
7445 #endif
7446     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7447 #elif PCRE2_CODE_UNIT_WIDTH == 32
7448   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7449 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7450   context->sourcereg = TMP2;
7451   }
7452 
7453 #ifdef SUPPORT_UNICODE
7454 utflength = 1;
7455 if (common->utf && HAS_EXTRALEN(*cc))
7456   utflength += GET_EXTRALEN(*cc);
7457 
7458 do
7459   {
7460 #endif
7461 
7462   context->length -= IN_UCHARS(1);
7463 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7464 
7465   /* Unaligned read is supported. */
7466   if (othercasebit != 0 && othercasechar == cc)
7467     {
7468     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7469     context->oc.asuchars[context->ucharptr] = othercasebit;
7470     }
7471   else
7472     {
7473     context->c.asuchars[context->ucharptr] = *cc;
7474     context->oc.asuchars[context->ucharptr] = 0;
7475     }
7476   context->ucharptr++;
7477 
7478 #if PCRE2_CODE_UNIT_WIDTH == 8
7479   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7480 #else
7481   if (context->ucharptr >= 2 || context->length == 0)
7482 #endif
7483     {
7484     if (context->length >= 4)
7485       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7486     else if (context->length >= 2)
7487       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7488 #if PCRE2_CODE_UNIT_WIDTH == 8
7489     else if (context->length >= 1)
7490       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7491 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7492     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7493 
7494     switch(context->ucharptr)
7495       {
7496       case 4 / sizeof(PCRE2_UCHAR):
7497       if (context->oc.asint != 0)
7498         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7499       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7500       break;
7501 
7502       case 2 / sizeof(PCRE2_UCHAR):
7503       if (context->oc.asushort != 0)
7504         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7505       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7506       break;
7507 
7508 #if PCRE2_CODE_UNIT_WIDTH == 8
7509       case 1:
7510       if (context->oc.asbyte != 0)
7511         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7512       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7513       break;
7514 #endif
7515 
7516       default:
7517       SLJIT_UNREACHABLE();
7518       break;
7519       }
7520     context->ucharptr = 0;
7521     }
7522 
7523 #else
7524 
7525   /* Unaligned read is unsupported or in 32 bit mode. */
7526   if (context->length >= 1)
7527     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528 
7529   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7530 
7531   if (othercasebit != 0 && othercasechar == cc)
7532     {
7533     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7534     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7535     }
7536   else
7537     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7538 
7539 #endif
7540 
7541   cc++;
7542 #ifdef SUPPORT_UNICODE
7543   utflength--;
7544   }
7545 while (utflength > 0);
7546 #endif
7547 
7548 return cc;
7549 }
7550 
7551 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7552 
7553 #define SET_TYPE_OFFSET(value) \
7554   if ((value) != typeoffset) \
7555     { \
7556     if ((value) < typeoffset) \
7557       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7558     else \
7559       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7560     } \
7561   typeoffset = (value);
7562 
7563 #define SET_CHAR_OFFSET(value) \
7564   if ((value) != charoffset) \
7565     { \
7566     if ((value) < charoffset) \
7567       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7568     else \
7569       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7570     } \
7571   charoffset = (value);
7572 
7573 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7574 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7575 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7576 {
7577 DEFINE_COMPILER;
7578 jump_list *found = NULL;
7579 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7580 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7581 struct sljit_jump *jump = NULL;
7582 PCRE2_SPTR ccbegin;
7583 int compares, invertcmp, numberofcmps;
7584 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7585 BOOL utf = common->utf;
7586 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7587 
7588 #ifdef SUPPORT_UNICODE
7589 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7590 BOOL charsaved = FALSE;
7591 int typereg = TMP1;
7592 const sljit_u32 *other_cases;
7593 sljit_uw typeoffset;
7594 #endif /* SUPPORT_UNICODE */
7595 
7596 /* Scanning the necessary info. */
7597 cc++;
7598 ccbegin = cc;
7599 compares = 0;
7600 
7601 if (cc[-1] & XCL_MAP)
7602   {
7603   min = 0;
7604   cc += 32 / sizeof(PCRE2_UCHAR);
7605   }
7606 
7607 while (*cc != XCL_END)
7608   {
7609   compares++;
7610   if (*cc == XCL_SINGLE)
7611     {
7612     cc ++;
7613     GETCHARINCTEST(c, cc);
7614     if (c > max) max = c;
7615     if (c < min) min = c;
7616 #ifdef SUPPORT_UNICODE
7617     needschar = TRUE;
7618 #endif /* SUPPORT_UNICODE */
7619     }
7620   else if (*cc == XCL_RANGE)
7621     {
7622     cc ++;
7623     GETCHARINCTEST(c, cc);
7624     if (c < min) min = c;
7625     GETCHARINCTEST(c, cc);
7626     if (c > max) max = c;
7627 #ifdef SUPPORT_UNICODE
7628     needschar = TRUE;
7629 #endif /* SUPPORT_UNICODE */
7630     }
7631 #ifdef SUPPORT_UNICODE
7632   else
7633     {
7634     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7635     cc++;
7636     if (*cc == PT_CLIST)
7637       {
7638       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7639       while (*other_cases != NOTACHAR)
7640         {
7641         if (*other_cases > max) max = *other_cases;
7642         if (*other_cases < min) min = *other_cases;
7643         other_cases++;
7644         }
7645       }
7646     else
7647       {
7648       max = READ_CHAR_MAX;
7649       min = 0;
7650       }
7651 
7652     switch(*cc)
7653       {
7654       case PT_ANY:
7655       /* Any either accepts everything or ignored. */
7656       if (cc[-1] == XCL_PROP)
7657         {
7658         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7659         if (list == backtracks)
7660           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7661         return;
7662         }
7663       break;
7664 
7665       case PT_LAMP:
7666       case PT_GC:
7667       case PT_PC:
7668       case PT_ALNUM:
7669       needstype = TRUE;
7670       break;
7671 
7672       case PT_SC:
7673       needsscript = TRUE;
7674       break;
7675 
7676       case PT_SPACE:
7677       case PT_PXSPACE:
7678       case PT_WORD:
7679       case PT_PXGRAPH:
7680       case PT_PXPRINT:
7681       case PT_PXPUNCT:
7682       needstype = TRUE;
7683       needschar = TRUE;
7684       break;
7685 
7686       case PT_CLIST:
7687       case PT_UCNC:
7688       needschar = TRUE;
7689       break;
7690 
7691       default:
7692       SLJIT_UNREACHABLE();
7693       break;
7694       }
7695     cc += 2;
7696     }
7697 #endif /* SUPPORT_UNICODE */
7698   }
7699 SLJIT_ASSERT(compares > 0);
7700 
7701 /* We are not necessary in utf mode even in 8 bit mode. */
7702 cc = ccbegin;
7703 if ((cc[-1] & XCL_NOT) != 0)
7704   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7705 else
7706   read_char(common, min, max, NULL, 0);
7707 
7708 if ((cc[-1] & XCL_HASPROP) == 0)
7709   {
7710   if ((cc[-1] & XCL_MAP) != 0)
7711     {
7712     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7713     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7714       {
7715       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7716       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7717       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7718       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7719       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7720       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7721       }
7722 
7723     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7724     JUMPHERE(jump);
7725 
7726     cc += 32 / sizeof(PCRE2_UCHAR);
7727     }
7728   else
7729     {
7730     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7731     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7732     }
7733   }
7734 else if ((cc[-1] & XCL_MAP) != 0)
7735   {
7736   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7737 #ifdef SUPPORT_UNICODE
7738   charsaved = TRUE;
7739 #endif /* SUPPORT_UNICODE */
7740   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7741     {
7742 #if PCRE2_CODE_UNIT_WIDTH == 8
7743     jump = NULL;
7744     if (common->utf)
7745 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7746       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7747 
7748     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7749     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7750     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7751     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7752     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7753     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7754 
7755 #if PCRE2_CODE_UNIT_WIDTH == 8
7756     if (common->utf)
7757 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7758       JUMPHERE(jump);
7759     }
7760 
7761   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7762   cc += 32 / sizeof(PCRE2_UCHAR);
7763   }
7764 
7765 #ifdef SUPPORT_UNICODE
7766 if (needstype || needsscript)
7767   {
7768   if (needschar && !charsaved)
7769     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7770 
7771 #if PCRE2_CODE_UNIT_WIDTH == 32
7772   if (!common->utf)
7773     {
7774     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7775     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7776     JUMPHERE(jump);
7777     }
7778 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7779 
7780   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7781   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7782   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7783   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7784   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7785   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7786   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7787   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7788 
7789   /* Before anything else, we deal with scripts. */
7790   if (needsscript)
7791     {
7792 // PH hacking
7793 //fprintf(stderr, "~~B\n");
7794 
7795       OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7796       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7797       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7798 
7799     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7800 
7801       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
7802 
7803     // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
7804 
7805     ccbegin = cc;
7806 
7807     while (*cc != XCL_END)
7808       {
7809       if (*cc == XCL_SINGLE)
7810         {
7811         cc ++;
7812         GETCHARINCTEST(c, cc);
7813         }
7814       else if (*cc == XCL_RANGE)
7815         {
7816         cc ++;
7817         GETCHARINCTEST(c, cc);
7818         GETCHARINCTEST(c, cc);
7819         }
7820       else
7821         {
7822         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7823         cc++;
7824         if (*cc == PT_SC)
7825           {
7826           compares--;
7827           invertcmp = (compares == 0 && list != backtracks);
7828           if (cc[-1] == XCL_NOTPROP)
7829             invertcmp ^= 0x1;
7830           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7831           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7832           }
7833         cc += 2;
7834         }
7835       }
7836 
7837     cc = ccbegin;
7838     }
7839 
7840   if (needschar)
7841     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7842 
7843   if (needstype)
7844     {
7845     if (!needschar)
7846       {
7847 // PH hacking
7848 //fprintf(stderr, "~~C\n");
7849   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7850   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7851   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7852   OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
7853 
7854       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7855 
7856   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
7857 
7858 //      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
7859       }
7860     else
7861       {
7862 // PH hacking
7863 //fprintf(stderr, "~~D\n");
7864   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7865 
7866       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7867 
7868   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7869   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7870 
7871       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7872       typereg = RETURN_ADDR;
7873       }
7874     }
7875   }
7876 #endif /* SUPPORT_UNICODE */
7877 
7878 /* Generating code. */
7879 charoffset = 0;
7880 numberofcmps = 0;
7881 #ifdef SUPPORT_UNICODE
7882 typeoffset = 0;
7883 #endif /* SUPPORT_UNICODE */
7884 
7885 while (*cc != XCL_END)
7886   {
7887   compares--;
7888   invertcmp = (compares == 0 && list != backtracks);
7889   jump = NULL;
7890 
7891   if (*cc == XCL_SINGLE)
7892     {
7893     cc ++;
7894     GETCHARINCTEST(c, cc);
7895 
7896     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7897       {
7898       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7899       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7900       numberofcmps++;
7901       }
7902     else if (numberofcmps > 0)
7903       {
7904       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7905       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7906       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7907       numberofcmps = 0;
7908       }
7909     else
7910       {
7911       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7912       numberofcmps = 0;
7913       }
7914     }
7915   else if (*cc == XCL_RANGE)
7916     {
7917     cc ++;
7918     GETCHARINCTEST(c, cc);
7919     SET_CHAR_OFFSET(c);
7920     GETCHARINCTEST(c, cc);
7921 
7922     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7923       {
7924       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7925       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7926       numberofcmps++;
7927       }
7928     else if (numberofcmps > 0)
7929       {
7930       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7931       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7932       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7933       numberofcmps = 0;
7934       }
7935     else
7936       {
7937       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7938       numberofcmps = 0;
7939       }
7940     }
7941 #ifdef SUPPORT_UNICODE
7942   else
7943     {
7944     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7945     if (*cc == XCL_NOTPROP)
7946       invertcmp ^= 0x1;
7947     cc++;
7948     switch(*cc)
7949       {
7950       case PT_ANY:
7951       if (!invertcmp)
7952         jump = JUMP(SLJIT_JUMP);
7953       break;
7954 
7955       case PT_LAMP:
7956       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7957       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7958       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7959       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7960       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7961       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7962       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7963       break;
7964 
7965       case PT_GC:
7966       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7967       SET_TYPE_OFFSET(c);
7968       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7969       break;
7970 
7971       case PT_PC:
7972       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7973       break;
7974 
7975       case PT_SC:
7976       compares++;
7977       /* Do nothing. */
7978       break;
7979 
7980       case PT_SPACE:
7981       case PT_PXSPACE:
7982       SET_CHAR_OFFSET(9);
7983       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7984       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7985 
7986       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7987       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7988 
7989       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7990       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7991 
7992       SET_TYPE_OFFSET(ucp_Zl);
7993       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7994       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7995       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7996       break;
7997 
7998       case PT_WORD:
7999       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8000       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8001       /* Fall through. */
8002 
8003       case PT_ALNUM:
8004       SET_TYPE_OFFSET(ucp_Ll);
8005       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8006       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8007       SET_TYPE_OFFSET(ucp_Nd);
8008       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8009       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8010       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8011       break;
8012 
8013       case PT_CLIST:
8014       other_cases = PRIV(ucd_caseless_sets) + cc[1];
8015 
8016       /* At least three characters are required.
8017          Otherwise this case would be handled by the normal code path. */
8018       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8019       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8020 
8021       /* Optimizing character pairs, if their difference is power of 2. */
8022       if (is_powerof2(other_cases[1] ^ other_cases[0]))
8023         {
8024         if (charoffset == 0)
8025           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8026         else
8027           {
8028           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8029           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8030           }
8031         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
8032         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8033         other_cases += 2;
8034         }
8035       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8036         {
8037         if (charoffset == 0)
8038           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8039         else
8040           {
8041           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8042           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8043           }
8044         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
8045         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8046 
8047         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8048         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8049 
8050         other_cases += 3;
8051         }
8052       else
8053         {
8054         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8055         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8056         }
8057 
8058       while (*other_cases != NOTACHAR)
8059         {
8060         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8061         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8062         }
8063       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8064       break;
8065 
8066       case PT_UCNC:
8067       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8068       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8069       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8070       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8071       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8072       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8073 
8074       SET_CHAR_OFFSET(0xa0);
8075       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8076       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8077       SET_CHAR_OFFSET(0);
8078       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8079       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8080       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8081       break;
8082 
8083       case PT_PXGRAPH:
8084       /* C and Z groups are the farthest two groups. */
8085       SET_TYPE_OFFSET(ucp_Ll);
8086       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8087       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8088 
8089       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8090 
8091       /* In case of ucp_Cf, we overwrite the result. */
8092       SET_CHAR_OFFSET(0x2066);
8093       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8094       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8095 
8096       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8097       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8098 
8099       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8100       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8101 
8102       JUMPHERE(jump);
8103       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8104       break;
8105 
8106       case PT_PXPRINT:
8107       /* C and Z groups are the farthest two groups. */
8108       SET_TYPE_OFFSET(ucp_Ll);
8109       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8110       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8111 
8112       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8113       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8114 
8115       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8116 
8117       /* In case of ucp_Cf, we overwrite the result. */
8118       SET_CHAR_OFFSET(0x2066);
8119       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8120       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8121 
8122       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8123       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8124 
8125       JUMPHERE(jump);
8126       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8127       break;
8128 
8129       case PT_PXPUNCT:
8130       SET_TYPE_OFFSET(ucp_Sc);
8131       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8132       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8133 
8134       SET_CHAR_OFFSET(0);
8135       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
8136       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8137 
8138       SET_TYPE_OFFSET(ucp_Pc);
8139       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8140       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8141       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8142       break;
8143 
8144       default:
8145       SLJIT_UNREACHABLE();
8146       break;
8147       }
8148     cc += 2;
8149     }
8150 #endif /* SUPPORT_UNICODE */
8151 
8152   if (jump != NULL)
8153     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8154   }
8155 
8156 if (found != NULL)
8157   set_jumps(found, LABEL());
8158 }
8159 
8160 #undef SET_TYPE_OFFSET
8161 #undef SET_CHAR_OFFSET
8162 
8163 #endif
8164 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8165 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8166 {
8167 DEFINE_COMPILER;
8168 int length;
8169 struct sljit_jump *jump[4];
8170 #ifdef SUPPORT_UNICODE
8171 struct sljit_label *label;
8172 #endif /* SUPPORT_UNICODE */
8173 
8174 switch(type)
8175   {
8176   case OP_SOD:
8177   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8178   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8179   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8180   return cc;
8181 
8182   case OP_SOM:
8183   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8184   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8185   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8186   return cc;
8187 
8188   case OP_NOT_WORD_BOUNDARY:
8189   case OP_WORD_BOUNDARY:
8190   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8191 #ifdef SUPPORT_UNICODE
8192   if (common->invalid_utf)
8193     {
8194     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
8195     add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS));
8196     add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8197     return cc;
8198     }
8199 #endif /* SUPPORT_UNICODE */
8200   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8201   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8202   return cc;
8203 
8204   case OP_EODN:
8205   /* Requires rather complex checks. */
8206   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8207   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8208     {
8209     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8210     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8211     if (common->mode == PCRE2_JIT_COMPLETE)
8212       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8213     else
8214       {
8215       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8216       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8217       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8218       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8219       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8220       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8221       check_partial(common, TRUE);
8222       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8223       JUMPHERE(jump[1]);
8224       }
8225     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8226     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8227     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8228     }
8229   else if (common->nltype == NLTYPE_FIXED)
8230     {
8231     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8232     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8233     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8234     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8235     }
8236   else
8237     {
8238     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8239     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8240     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8241     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8242     jump[2] = JUMP(SLJIT_GREATER);
8243     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8244     /* Equal. */
8245     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8246     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8247     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8248 
8249     JUMPHERE(jump[1]);
8250     if (common->nltype == NLTYPE_ANYCRLF)
8251       {
8252       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8253       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8254       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8255       }
8256     else
8257       {
8258       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8259       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8260       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8261       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8262       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8263       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8264       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8265       }
8266     JUMPHERE(jump[2]);
8267     JUMPHERE(jump[3]);
8268     }
8269   JUMPHERE(jump[0]);
8270   check_partial(common, FALSE);
8271   return cc;
8272 
8273   case OP_EOD:
8274   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8275   check_partial(common, FALSE);
8276   return cc;
8277 
8278   case OP_DOLL:
8279   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8280   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8281   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8282 
8283   if (!common->endonly)
8284     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8285   else
8286     {
8287     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8288     check_partial(common, FALSE);
8289     }
8290   return cc;
8291 
8292   case OP_DOLLM:
8293   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8294   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8295   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8296   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8297   check_partial(common, FALSE);
8298   jump[0] = JUMP(SLJIT_JUMP);
8299   JUMPHERE(jump[1]);
8300 
8301   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8302     {
8303     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8304     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8305     if (common->mode == PCRE2_JIT_COMPLETE)
8306       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8307     else
8308       {
8309       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8310       /* STR_PTR = STR_END - IN_UCHARS(1) */
8311       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8312       check_partial(common, TRUE);
8313       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8314       JUMPHERE(jump[1]);
8315       }
8316 
8317     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8318     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8319     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8320     }
8321   else
8322     {
8323     peek_char(common, common->nlmax, TMP3, 0, NULL);
8324     check_newlinechar(common, common->nltype, backtracks, FALSE);
8325     }
8326   JUMPHERE(jump[0]);
8327   return cc;
8328 
8329   case OP_CIRC:
8330   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8331   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8332   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8333   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8334   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8335   return cc;
8336 
8337   case OP_CIRCM:
8338   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8339   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8340   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8341   OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8342   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8343   jump[0] = JUMP(SLJIT_JUMP);
8344   JUMPHERE(jump[1]);
8345 
8346   if (!common->alt_circumflex)
8347     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8348 
8349   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8350     {
8351     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8352     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8353     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8354     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8355     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8356     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8357     }
8358   else
8359     {
8360     peek_char_back(common, common->nlmax, backtracks);
8361     check_newlinechar(common, common->nltype, backtracks, FALSE);
8362     }
8363   JUMPHERE(jump[0]);
8364   return cc;
8365 
8366   case OP_REVERSE:
8367   length = GET(cc, 0);
8368   if (length == 0)
8369     return cc + LINK_SIZE;
8370   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8371 #ifdef SUPPORT_UNICODE
8372   if (common->utf)
8373     {
8374     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8375     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8376     label = LABEL();
8377     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8378     move_back(common, backtracks, FALSE);
8379     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8380     JUMPTO(SLJIT_NOT_ZERO, label);
8381     }
8382   else
8383 #endif
8384     {
8385     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8386     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8387     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
8388     }
8389   check_start_used_ptr(common);
8390   return cc + LINK_SIZE;
8391   }
8392 SLJIT_UNREACHABLE();
8393 return cc;
8394 }
8395 
8396 #ifdef SUPPORT_UNICODE
8397 
8398 #if PCRE2_CODE_UNIT_WIDTH != 32
8399 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8400 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8401 {
8402 PCRE2_SPTR start_subject = args->begin;
8403 PCRE2_SPTR end_subject = args->end;
8404 int lgb, rgb, ricount;
8405 PCRE2_SPTR prevcc, startcc, bptr;
8406 BOOL first = TRUE;
8407 uint32_t c;
8408 
8409 prevcc = cc;
8410 startcc = NULL;
8411 do
8412   {
8413   GETCHARINC(c, cc);
8414   rgb = UCD_GRAPHBREAK(c);
8415 
8416   if (first)
8417     {
8418     lgb = rgb;
8419     startcc = cc;
8420     first = FALSE;
8421     continue;
8422     }
8423 
8424   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8425     break;
8426 
8427   /* Not breaking between Regional Indicators is allowed only if there
8428   are an even number of preceding RIs. */
8429 
8430   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8431     {
8432     ricount = 0;
8433     bptr = prevcc;
8434 
8435     /* bptr is pointing to the left-hand character */
8436     while (bptr > start_subject)
8437       {
8438       bptr--;
8439       BACKCHAR(bptr);
8440       GETCHAR(c, bptr);
8441 
8442       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8443         break;
8444 
8445       ricount++;
8446       }
8447 
8448     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8449     }
8450 
8451   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8452   allows any number of them before a following Extended_Pictographic. */
8453 
8454   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8455        lgb != ucp_gbExtended_Pictographic)
8456     lgb = rgb;
8457 
8458   prevcc = startcc;
8459   startcc = cc;
8460   }
8461 while (cc < end_subject);
8462 
8463 return startcc;
8464 }
8465 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8466 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8467 {
8468 PCRE2_SPTR start_subject = args->begin;
8469 PCRE2_SPTR end_subject = args->end;
8470 int lgb, rgb, ricount;
8471 PCRE2_SPTR prevcc, startcc, bptr;
8472 BOOL first = TRUE;
8473 uint32_t c;
8474 
8475 prevcc = cc;
8476 startcc = NULL;
8477 do
8478   {
8479   GETCHARINC_INVALID(c, cc, end_subject, break);
8480   rgb = UCD_GRAPHBREAK(c);
8481 
8482   if (first)
8483     {
8484     lgb = rgb;
8485     startcc = cc;
8486     first = FALSE;
8487     continue;
8488     }
8489 
8490   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8491     break;
8492 
8493   /* Not breaking between Regional Indicators is allowed only if there
8494   are an even number of preceding RIs. */
8495 
8496   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8497     {
8498     ricount = 0;
8499     bptr = prevcc;
8500 
8501     /* bptr is pointing to the left-hand character */
8502     while (bptr > start_subject)
8503       {
8504       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8505 
8506       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8507         break;
8508 
8509       ricount++;
8510       }
8511 
8512     if ((ricount & 1) != 0)
8513       break;  /* Grapheme break required */
8514     }
8515 
8516   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8517   allows any number of them before a following Extended_Pictographic. */
8518 
8519   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8520        lgb != ucp_gbExtended_Pictographic)
8521     lgb = rgb;
8522 
8523   prevcc = startcc;
8524   startcc = cc;
8525   }
8526 while (cc < end_subject);
8527 
8528 return startcc;
8529 }
8530 
8531 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8532 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8533 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8534 {
8535 PCRE2_SPTR start_subject = args->begin;
8536 PCRE2_SPTR end_subject = args->end;
8537 int lgb, rgb, ricount;
8538 PCRE2_SPTR bptr;
8539 uint32_t c;
8540 
8541 GETCHARINC(c, cc);
8542 #if PCRE2_CODE_UNIT_WIDTH == 32
8543 if (c >= 0x110000)
8544   return NULL;
8545 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8546 lgb = UCD_GRAPHBREAK(c);
8547 
8548 while (cc < end_subject)
8549   {
8550   c = *cc;
8551 #if PCRE2_CODE_UNIT_WIDTH == 32
8552   if (c >= 0x110000)
8553     break;
8554 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8555   rgb = UCD_GRAPHBREAK(c);
8556 
8557   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8558     break;
8559 
8560   /* Not breaking between Regional Indicators is allowed only if there
8561   are an even number of preceding RIs. */
8562 
8563   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8564     {
8565     ricount = 0;
8566     bptr = cc - 1;
8567 
8568     /* bptr is pointing to the left-hand character */
8569     while (bptr > start_subject)
8570       {
8571       bptr--;
8572       c = *bptr;
8573 #if PCRE2_CODE_UNIT_WIDTH == 32
8574       if (c >= 0x110000)
8575         break;
8576 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8577 
8578       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8579 
8580       ricount++;
8581       }
8582 
8583     if ((ricount & 1) != 0)
8584       break;  /* Grapheme break required */
8585     }
8586 
8587   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8588   allows any number of them before a following Extended_Pictographic. */
8589 
8590   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8591        lgb != ucp_gbExtended_Pictographic)
8592     lgb = rgb;
8593 
8594   cc++;
8595   }
8596 
8597 return cc;
8598 }
8599 
8600 #endif /* SUPPORT_UNICODE */
8601 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8602 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8603 {
8604 DEFINE_COMPILER;
8605 int length;
8606 unsigned int c, oc, bit;
8607 compare_context context;
8608 struct sljit_jump *jump[3];
8609 jump_list *end_list;
8610 #ifdef SUPPORT_UNICODE
8611 PCRE2_UCHAR propdata[5];
8612 #endif /* SUPPORT_UNICODE */
8613 
8614 switch(type)
8615   {
8616   case OP_NOT_DIGIT:
8617   case OP_DIGIT:
8618   /* Digits are usually 0-9, so it is worth to optimize them. */
8619   if (check_str_ptr)
8620     detect_partial_match(common, backtracks);
8621 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8622   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8623     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8624   else
8625 #endif
8626     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8627     /* Flip the starting bit in the negative case. */
8628   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8629   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8630   return cc;
8631 
8632   case OP_NOT_WHITESPACE:
8633   case OP_WHITESPACE:
8634   if (check_str_ptr)
8635     detect_partial_match(common, backtracks);
8636 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8637   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8638     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8639   else
8640 #endif
8641     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8642   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8643   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8644   return cc;
8645 
8646   case OP_NOT_WORDCHAR:
8647   case OP_WORDCHAR:
8648   if (check_str_ptr)
8649     detect_partial_match(common, backtracks);
8650 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8651   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8652     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8653   else
8654 #endif
8655     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8656   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8657   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8658   return cc;
8659 
8660   case OP_ANY:
8661   if (check_str_ptr)
8662     detect_partial_match(common, backtracks);
8663   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8664   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8665     {
8666     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8667     end_list = NULL;
8668     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8669       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8670     else
8671       check_str_end(common, &end_list);
8672 
8673     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8674     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8675     set_jumps(end_list, LABEL());
8676     JUMPHERE(jump[0]);
8677     }
8678   else
8679     check_newlinechar(common, common->nltype, backtracks, TRUE);
8680   return cc;
8681 
8682   case OP_ALLANY:
8683   if (check_str_ptr)
8684     detect_partial_match(common, backtracks);
8685 #ifdef SUPPORT_UNICODE
8686   if (common->utf)
8687     {
8688     if (common->invalid_utf)
8689       {
8690       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8691       return cc;
8692       }
8693 
8694 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8695     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8696     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8697 #if PCRE2_CODE_UNIT_WIDTH == 8
8698     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8699     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8700     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8701 #elif PCRE2_CODE_UNIT_WIDTH == 16
8702     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8703     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8704     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8705     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8706     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8707     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8708 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8709     JUMPHERE(jump[0]);
8710     return cc;
8711 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8712     }
8713 #endif /* SUPPORT_UNICODE */
8714   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8715   return cc;
8716 
8717   case OP_ANYBYTE:
8718   if (check_str_ptr)
8719     detect_partial_match(common, backtracks);
8720   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8721   return cc;
8722 
8723 #ifdef SUPPORT_UNICODE
8724   case OP_NOTPROP:
8725   case OP_PROP:
8726   propdata[0] = XCL_HASPROP;
8727   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8728   propdata[2] = cc[0];
8729   propdata[3] = cc[1];
8730   propdata[4] = XCL_END;
8731   if (check_str_ptr)
8732     detect_partial_match(common, backtracks);
8733   compile_xclass_matchingpath(common, propdata, backtracks);
8734   return cc + 2;
8735 #endif
8736 
8737   case OP_ANYNL:
8738   if (check_str_ptr)
8739     detect_partial_match(common, backtracks);
8740   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8741   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8742   /* We don't need to handle soft partial matching case. */
8743   end_list = NULL;
8744   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8745     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8746   else
8747     check_str_end(common, &end_list);
8748   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8749   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8750   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8751   jump[2] = JUMP(SLJIT_JUMP);
8752   JUMPHERE(jump[0]);
8753   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8754   set_jumps(end_list, LABEL());
8755   JUMPHERE(jump[1]);
8756   JUMPHERE(jump[2]);
8757   return cc;
8758 
8759   case OP_NOT_HSPACE:
8760   case OP_HSPACE:
8761   if (check_str_ptr)
8762     detect_partial_match(common, backtracks);
8763 
8764   if (type == OP_NOT_HSPACE)
8765     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8766   else
8767     read_char(common, 0x9, 0x3000, NULL, 0);
8768 
8769   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8770   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8771   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8772   return cc;
8773 
8774   case OP_NOT_VSPACE:
8775   case OP_VSPACE:
8776   if (check_str_ptr)
8777     detect_partial_match(common, backtracks);
8778 
8779   if (type == OP_NOT_VSPACE)
8780     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8781   else
8782     read_char(common, 0xa, 0x2029, NULL, 0);
8783 
8784   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8785   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8786   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8787   return cc;
8788 
8789 #ifdef SUPPORT_UNICODE
8790   case OP_EXTUNI:
8791   if (check_str_ptr)
8792     detect_partial_match(common, backtracks);
8793 
8794   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8795   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8796 
8797 #if PCRE2_CODE_UNIT_WIDTH != 32
8798   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8799     common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8800   if (common->invalid_utf)
8801     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8802 #else
8803   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8804   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8805 #endif
8806 
8807   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8808 
8809   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8810     {
8811     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8812     /* Since we successfully read a char above, partial matching must occure. */
8813     check_partial(common, TRUE);
8814     JUMPHERE(jump[0]);
8815     }
8816   return cc;
8817 #endif
8818 
8819   case OP_CHAR:
8820   case OP_CHARI:
8821   length = 1;
8822 #ifdef SUPPORT_UNICODE
8823   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8824 #endif
8825 
8826   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8827     detect_partial_match(common, backtracks);
8828 
8829   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8830     {
8831     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8832     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8833       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8834 
8835     context.length = IN_UCHARS(length);
8836     context.sourcereg = -1;
8837 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8838     context.ucharptr = 0;
8839 #endif
8840     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8841     }
8842 
8843 #ifdef SUPPORT_UNICODE
8844   if (common->utf)
8845     {
8846     GETCHAR(c, cc);
8847     }
8848   else
8849 #endif
8850     c = *cc;
8851 
8852   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8853 
8854   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8855     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8856 
8857   oc = char_othercase(common, c);
8858   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8859 
8860   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8861 
8862   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8863     {
8864     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8865     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8866     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8867     }
8868   else
8869     {
8870     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8871     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8872     JUMPHERE(jump[0]);
8873     }
8874   return cc + length;
8875 
8876   case OP_NOT:
8877   case OP_NOTI:
8878   if (check_str_ptr)
8879     detect_partial_match(common, backtracks);
8880 
8881   length = 1;
8882 #ifdef SUPPORT_UNICODE
8883   if (common->utf)
8884     {
8885 #if PCRE2_CODE_UNIT_WIDTH == 8
8886     c = *cc;
8887     if (c < 128 && !common->invalid_utf)
8888       {
8889       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8890       if (type == OP_NOT || !char_has_othercase(common, cc))
8891         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8892       else
8893         {
8894         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8895         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8896         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8897         }
8898       /* Skip the variable-length character. */
8899       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8900       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8901       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8902       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8903       JUMPHERE(jump[0]);
8904       return cc + 1;
8905       }
8906     else
8907 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8908       {
8909       GETCHARLEN(c, cc, length);
8910       }
8911     }
8912   else
8913 #endif /* SUPPORT_UNICODE */
8914     c = *cc;
8915 
8916   if (type == OP_NOT || !char_has_othercase(common, cc))
8917     {
8918     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8919     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8920     }
8921   else
8922     {
8923     oc = char_othercase(common, c);
8924     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8925     bit = c ^ oc;
8926     if (is_powerof2(bit))
8927       {
8928       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8929       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8930       }
8931     else
8932       {
8933       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8934       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8935       }
8936     }
8937   return cc + length;
8938 
8939   case OP_CLASS:
8940   case OP_NCLASS:
8941   if (check_str_ptr)
8942     detect_partial_match(common, backtracks);
8943 
8944 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8945   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8946   if (type == OP_NCLASS)
8947     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8948   else
8949     read_char(common, 0, bit, NULL, 0);
8950 #else
8951   if (type == OP_NCLASS)
8952     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8953   else
8954     read_char(common, 0, 255, NULL, 0);
8955 #endif
8956 
8957   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8958     return cc + 32 / sizeof(PCRE2_UCHAR);
8959 
8960 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8961   jump[0] = NULL;
8962   if (common->utf)
8963     {
8964     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8965     if (type == OP_CLASS)
8966       {
8967       add_jump(compiler, backtracks, jump[0]);
8968       jump[0] = NULL;
8969       }
8970     }
8971 #elif PCRE2_CODE_UNIT_WIDTH != 8
8972   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8973   if (type == OP_CLASS)
8974     {
8975     add_jump(compiler, backtracks, jump[0]);
8976     jump[0] = NULL;
8977     }
8978 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8979 
8980   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8981   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8982   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8983   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8984   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8985   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8986 
8987 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8988   if (jump[0] != NULL)
8989     JUMPHERE(jump[0]);
8990 #endif
8991   return cc + 32 / sizeof(PCRE2_UCHAR);
8992 
8993 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8994   case OP_XCLASS:
8995   if (check_str_ptr)
8996     detect_partial_match(common, backtracks);
8997   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8998   return cc + GET(cc, 0) - 1;
8999 #endif
9000   }
9001 SLJIT_UNREACHABLE();
9002 return cc;
9003 }
9004 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9005 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9006 {
9007 /* This function consumes at least one input character. */
9008 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9009 DEFINE_COMPILER;
9010 PCRE2_SPTR ccbegin = cc;
9011 compare_context context;
9012 int size;
9013 
9014 context.length = 0;
9015 do
9016   {
9017   if (cc >= ccend)
9018     break;
9019 
9020   if (*cc == OP_CHAR)
9021     {
9022     size = 1;
9023 #ifdef SUPPORT_UNICODE
9024     if (common->utf && HAS_EXTRALEN(cc[1]))
9025       size += GET_EXTRALEN(cc[1]);
9026 #endif
9027     }
9028   else if (*cc == OP_CHARI)
9029     {
9030     size = 1;
9031 #ifdef SUPPORT_UNICODE
9032     if (common->utf)
9033       {
9034       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9035         size = 0;
9036       else if (HAS_EXTRALEN(cc[1]))
9037         size += GET_EXTRALEN(cc[1]);
9038       }
9039     else
9040 #endif
9041     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9042       size = 0;
9043     }
9044   else
9045     size = 0;
9046 
9047   cc += 1 + size;
9048   context.length += IN_UCHARS(size);
9049   }
9050 while (size > 0 && context.length <= 128);
9051 
9052 cc = ccbegin;
9053 if (context.length > 0)
9054   {
9055   /* We have a fixed-length byte sequence. */
9056   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9057   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9058 
9059   context.sourcereg = -1;
9060 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9061   context.ucharptr = 0;
9062 #endif
9063   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9064   return cc;
9065   }
9066 
9067 /* A non-fixed length character will be checked if length == 0. */
9068 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9069 }
9070 
9071 /* Forward definitions. */
9072 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9073 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9074 
9075 #define PUSH_BACKTRACK(size, ccstart, error) \
9076   do \
9077     { \
9078     backtrack = sljit_alloc_memory(compiler, (size)); \
9079     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9080       return error; \
9081     memset(backtrack, 0, size); \
9082     backtrack->prev = parent->top; \
9083     backtrack->cc = (ccstart); \
9084     parent->top = backtrack; \
9085     } \
9086   while (0)
9087 
9088 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9089   do \
9090     { \
9091     backtrack = sljit_alloc_memory(compiler, (size)); \
9092     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9093       return; \
9094     memset(backtrack, 0, size); \
9095     backtrack->prev = parent->top; \
9096     backtrack->cc = (ccstart); \
9097     parent->top = backtrack; \
9098     } \
9099   while (0)
9100 
9101 #define BACKTRACK_AS(type) ((type *)backtrack)
9102 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9103 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9104 {
9105 /* The OVECTOR offset goes to TMP2. */
9106 DEFINE_COMPILER;
9107 int count = GET2(cc, 1 + IMM2_SIZE);
9108 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9109 unsigned int offset;
9110 jump_list *found = NULL;
9111 
9112 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9113 
9114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9115 
9116 count--;
9117 while (count-- > 0)
9118   {
9119   offset = GET2(slot, 0) << 1;
9120   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9121   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9122   slot += common->name_entry_size;
9123   }
9124 
9125 offset = GET2(slot, 0) << 1;
9126 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9127 if (backtracks != NULL && !common->unset_backref)
9128   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9129 
9130 set_jumps(found, LABEL());
9131 }
9132 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9133 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9134 {
9135 DEFINE_COMPILER;
9136 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9137 int offset = 0;
9138 struct sljit_jump *jump = NULL;
9139 struct sljit_jump *partial;
9140 struct sljit_jump *nopartial;
9141 #if defined SUPPORT_UNICODE
9142 struct sljit_label *loop;
9143 struct sljit_label *caseless_loop;
9144 jump_list *no_match = NULL;
9145 int source_reg = COUNT_MATCH;
9146 int source_end_reg = ARGUMENTS;
9147 int char1_reg = STACK_LIMIT;
9148 #endif /* SUPPORT_UNICODE */
9149 
9150 if (ref)
9151   {
9152   offset = GET2(cc, 1) << 1;
9153   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9154   /* OVECTOR(1) contains the "string begin - 1" constant. */
9155   if (withchecks && !common->unset_backref)
9156     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9157   }
9158 else
9159   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9160 
9161 #if defined SUPPORT_UNICODE
9162 if (common->utf && *cc == OP_REFI)
9163   {
9164   SLJIT_ASSERT(common->iref_ptr != 0);
9165 
9166   if (ref)
9167     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9168   else
9169     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9170 
9171   if (withchecks && emptyfail)
9172     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9173 
9174   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9175   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9176   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9177 
9178   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9179   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9180 
9181   loop = LABEL();
9182   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9183   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9184 
9185   /* Read original character. It must be a valid UTF character. */
9186   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9187   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9188 
9189   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9190 
9191   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9192   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9193   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9194 
9195   /* Read second character. */
9196   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9197 
9198   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9199 
9200 // PH hacking
9201 //fprintf(stderr, "~~E\n");
9202 
9203   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9204 
9205   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9206 
9207     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9208 
9209   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9210 
9211     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9212 
9213   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9214 
9215   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9216   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9217   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9218   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9219 
9220   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9221   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9222   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9223 
9224   caseless_loop = LABEL();
9225   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9226   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9227   OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9228   JUMPTO(SLJIT_EQUAL, loop);
9229   JUMPTO(SLJIT_LESS, caseless_loop);
9230 
9231   set_jumps(no_match, LABEL());
9232   if (common->mode == PCRE2_JIT_COMPLETE)
9233     JUMPHERE(partial);
9234 
9235   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9236   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9237   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9238   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9239 
9240   if (common->mode != PCRE2_JIT_COMPLETE)
9241     {
9242     JUMPHERE(partial);
9243     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9244     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9245     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9246 
9247     check_partial(common, FALSE);
9248     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9249     }
9250 
9251   JUMPHERE(jump);
9252   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9253   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9254   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9255   return;
9256   }
9257 else
9258 #endif /* SUPPORT_UNICODE */
9259   {
9260   if (ref)
9261     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9262   else
9263     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9264 
9265   if (withchecks)
9266     jump = JUMP(SLJIT_ZERO);
9267 
9268   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9269   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9270   if (common->mode == PCRE2_JIT_COMPLETE)
9271     add_jump(compiler, backtracks, partial);
9272 
9273   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9274   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9275 
9276   if (common->mode != PCRE2_JIT_COMPLETE)
9277     {
9278     nopartial = JUMP(SLJIT_JUMP);
9279     JUMPHERE(partial);
9280     /* TMP2 -= STR_END - STR_PTR */
9281     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9282     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9283     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9284     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9285     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9286     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9287     JUMPHERE(partial);
9288     check_partial(common, FALSE);
9289     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9290     JUMPHERE(nopartial);
9291     }
9292   }
9293 
9294 if (jump != NULL)
9295   {
9296   if (emptyfail)
9297     add_jump(compiler, backtracks, jump);
9298   else
9299     JUMPHERE(jump);
9300   }
9301 }
9302 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9303 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9304 {
9305 DEFINE_COMPILER;
9306 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9307 backtrack_common *backtrack;
9308 PCRE2_UCHAR type;
9309 int offset = 0;
9310 struct sljit_label *label;
9311 struct sljit_jump *zerolength;
9312 struct sljit_jump *jump = NULL;
9313 PCRE2_SPTR ccbegin = cc;
9314 int min = 0, max = 0;
9315 BOOL minimize;
9316 
9317 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9318 
9319 if (ref)
9320   offset = GET2(cc, 1) << 1;
9321 else
9322   cc += IMM2_SIZE;
9323 type = cc[1 + IMM2_SIZE];
9324 
9325 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9326 minimize = (type & 0x1) != 0;
9327 switch(type)
9328   {
9329   case OP_CRSTAR:
9330   case OP_CRMINSTAR:
9331   min = 0;
9332   max = 0;
9333   cc += 1 + IMM2_SIZE + 1;
9334   break;
9335   case OP_CRPLUS:
9336   case OP_CRMINPLUS:
9337   min = 1;
9338   max = 0;
9339   cc += 1 + IMM2_SIZE + 1;
9340   break;
9341   case OP_CRQUERY:
9342   case OP_CRMINQUERY:
9343   min = 0;
9344   max = 1;
9345   cc += 1 + IMM2_SIZE + 1;
9346   break;
9347   case OP_CRRANGE:
9348   case OP_CRMINRANGE:
9349   min = GET2(cc, 1 + IMM2_SIZE + 1);
9350   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9351   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9352   break;
9353   default:
9354   SLJIT_UNREACHABLE();
9355   break;
9356   }
9357 
9358 if (!minimize)
9359   {
9360   if (min == 0)
9361     {
9362     allocate_stack(common, 2);
9363     if (ref)
9364       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9365     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9366     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9367     /* Temporary release of STR_PTR. */
9368     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9369     /* Handles both invalid and empty cases. Since the minimum repeat,
9370     is zero the invalid case is basically the same as an empty case. */
9371     if (ref)
9372       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9373     else
9374       {
9375       compile_dnref_search(common, ccbegin, NULL);
9376       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9377       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9378       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9379       }
9380     /* Restore if not zero length. */
9381     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9382     }
9383   else
9384     {
9385     allocate_stack(common, 1);
9386     if (ref)
9387       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9388     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9389     if (ref)
9390       {
9391       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9392       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9393       }
9394     else
9395       {
9396       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9397       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9398       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9399       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9400       }
9401     }
9402 
9403   if (min > 1 || max > 1)
9404     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9405 
9406   label = LABEL();
9407   if (!ref)
9408     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9409   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9410 
9411   if (min > 1 || max > 1)
9412     {
9413     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9414     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9415     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9416     if (min > 1)
9417       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9418     if (max > 1)
9419       {
9420       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9421       allocate_stack(common, 1);
9422       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9423       JUMPTO(SLJIT_JUMP, label);
9424       JUMPHERE(jump);
9425       }
9426     }
9427 
9428   if (max == 0)
9429     {
9430     /* Includes min > 1 case as well. */
9431     allocate_stack(common, 1);
9432     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9433     JUMPTO(SLJIT_JUMP, label);
9434     }
9435 
9436   JUMPHERE(zerolength);
9437   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9438 
9439   count_match(common);
9440   return cc;
9441   }
9442 
9443 allocate_stack(common, ref ? 2 : 3);
9444 if (ref)
9445   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9447 if (type != OP_CRMINSTAR)
9448   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9449 
9450 if (min == 0)
9451   {
9452   /* Handles both invalid and empty cases. Since the minimum repeat,
9453   is zero the invalid case is basically the same as an empty case. */
9454   if (ref)
9455     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9456   else
9457     {
9458     compile_dnref_search(common, ccbegin, NULL);
9459     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9460     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9461     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9462     }
9463   /* Length is non-zero, we can match real repeats. */
9464   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9465   jump = JUMP(SLJIT_JUMP);
9466   }
9467 else
9468   {
9469   if (ref)
9470     {
9471     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9472     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9473     }
9474   else
9475     {
9476     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9477     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9478     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9479     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9480     }
9481   }
9482 
9483 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9484 if (max > 0)
9485   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9486 
9487 if (!ref)
9488   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9489 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9491 
9492 if (min > 1)
9493   {
9494   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9495   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9496   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9497   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9498   }
9499 else if (max > 0)
9500   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9501 
9502 if (jump != NULL)
9503   JUMPHERE(jump);
9504 JUMPHERE(zerolength);
9505 
9506 count_match(common);
9507 return cc;
9508 }
9509 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9510 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9511 {
9512 DEFINE_COMPILER;
9513 backtrack_common *backtrack;
9514 recurse_entry *entry = common->entries;
9515 recurse_entry *prev = NULL;
9516 sljit_sw start = GET(cc, 1);
9517 PCRE2_SPTR start_cc;
9518 BOOL needs_control_head;
9519 
9520 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9521 
9522 /* Inlining simple patterns. */
9523 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9524   {
9525   start_cc = common->start + start;
9526   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9527   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9528   return cc + 1 + LINK_SIZE;
9529   }
9530 
9531 while (entry != NULL)
9532   {
9533   if (entry->start == start)
9534     break;
9535   prev = entry;
9536   entry = entry->next;
9537   }
9538 
9539 if (entry == NULL)
9540   {
9541   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9542   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9543     return NULL;
9544   entry->next = NULL;
9545   entry->entry_label = NULL;
9546   entry->backtrack_label = NULL;
9547   entry->entry_calls = NULL;
9548   entry->backtrack_calls = NULL;
9549   entry->start = start;
9550 
9551   if (prev != NULL)
9552     prev->next = entry;
9553   else
9554     common->entries = entry;
9555   }
9556 
9557 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9558 
9559 if (entry->entry_label == NULL)
9560   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9561 else
9562   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9563 /* Leave if the match is failed. */
9564 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9565 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9566 return cc + 1 + LINK_SIZE;
9567 }
9568 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9569 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9570 {
9571 PCRE2_SPTR begin;
9572 PCRE2_SIZE *ovector;
9573 sljit_u32 oveccount, capture_top;
9574 
9575 if (arguments->callout == NULL)
9576   return 0;
9577 
9578 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9579 
9580 begin = arguments->begin;
9581 ovector = (PCRE2_SIZE*)(callout_block + 1);
9582 oveccount = callout_block->capture_top;
9583 
9584 SLJIT_ASSERT(oveccount >= 1);
9585 
9586 callout_block->version = 2;
9587 callout_block->callout_flags = 0;
9588 
9589 /* Offsets in subject. */
9590 callout_block->subject_length = arguments->end - arguments->begin;
9591 callout_block->start_match = jit_ovector[0] - begin;
9592 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9593 callout_block->subject = begin;
9594 
9595 /* Convert and copy the JIT offset vector to the ovector array. */
9596 callout_block->capture_top = 1;
9597 callout_block->offset_vector = ovector;
9598 
9599 ovector[0] = PCRE2_UNSET;
9600 ovector[1] = PCRE2_UNSET;
9601 ovector += 2;
9602 jit_ovector += 2;
9603 capture_top = 1;
9604 
9605 /* Convert pointers to sizes. */
9606 while (--oveccount != 0)
9607   {
9608   capture_top++;
9609 
9610   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9611   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9612 
9613   if (ovector[0] != PCRE2_UNSET)
9614     callout_block->capture_top = capture_top;
9615 
9616   ovector += 2;
9617   jit_ovector += 2;
9618   }
9619 
9620 return (arguments->callout)(callout_block, arguments->callout_data);
9621 }
9622 
9623 #define CALLOUT_ARG_OFFSET(arg) \
9624     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9625 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9626 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9627 {
9628 DEFINE_COMPILER;
9629 backtrack_common *backtrack;
9630 sljit_s32 mov_opcode;
9631 unsigned int callout_length = (*cc == OP_CALLOUT)
9632     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9633 sljit_sw value1;
9634 sljit_sw value2;
9635 sljit_sw value3;
9636 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9637 
9638 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9639 
9640 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9641 
9642 allocate_stack(common, callout_arg_size);
9643 
9644 SLJIT_ASSERT(common->capture_last_ptr != 0);
9645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9646 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9647 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9648 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9649 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9650 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9651 
9652 /* These pointer sized fields temporarly stores internal variables. */
9653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9654 
9655 if (common->mark_ptr != 0)
9656   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9657 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9658 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9659 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9660 
9661 if (*cc == OP_CALLOUT)
9662   {
9663   value1 = 0;
9664   value2 = 0;
9665   value3 = 0;
9666   }
9667 else
9668   {
9669   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9670   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9671   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9672   }
9673 
9674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9675 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9676 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9678 
9679 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9680 
9681 /* Needed to save important temporary registers. */
9682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9683 /* SLJIT_R0 = arguments */
9684 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9685 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9686 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9687 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9688 free_stack(common, callout_arg_size);
9689 
9690 /* Check return value. */
9691 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9692 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9693 if (common->abort_label == NULL)
9694   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9695 else
9696   JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9697 return cc + callout_length;
9698 }
9699 
9700 #undef CALLOUT_ARG_SIZE
9701 #undef CALLOUT_ARG_OFFSET
9702 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9703 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9704 {
9705 while (TRUE)
9706   {
9707   switch (*cc)
9708     {
9709     case OP_CALLOUT_STR:
9710     cc += GET(cc, 1 + 2*LINK_SIZE);
9711     break;
9712 
9713     case OP_NOT_WORD_BOUNDARY:
9714     case OP_WORD_BOUNDARY:
9715     case OP_CIRC:
9716     case OP_CIRCM:
9717     case OP_DOLL:
9718     case OP_DOLLM:
9719     case OP_CALLOUT:
9720     case OP_ALT:
9721     cc += PRIV(OP_lengths)[*cc];
9722     break;
9723 
9724     case OP_KET:
9725     return FALSE;
9726 
9727     default:
9728     return TRUE;
9729     }
9730   }
9731 }
9732 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9733 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9734 {
9735 DEFINE_COMPILER;
9736 int framesize;
9737 int extrasize;
9738 BOOL local_quit_available = FALSE;
9739 BOOL needs_control_head;
9740 int private_data_ptr;
9741 backtrack_common altbacktrack;
9742 PCRE2_SPTR ccbegin;
9743 PCRE2_UCHAR opcode;
9744 PCRE2_UCHAR bra = OP_BRA;
9745 jump_list *tmp = NULL;
9746 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9747 jump_list **found;
9748 /* Saving previous accept variables. */
9749 BOOL save_local_quit_available = common->local_quit_available;
9750 BOOL save_in_positive_assertion = common->in_positive_assertion;
9751 then_trap_backtrack *save_then_trap = common->then_trap;
9752 struct sljit_label *save_quit_label = common->quit_label;
9753 struct sljit_label *save_accept_label = common->accept_label;
9754 jump_list *save_quit = common->quit;
9755 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9756 jump_list *save_accept = common->accept;
9757 struct sljit_jump *jump;
9758 struct sljit_jump *brajump = NULL;
9759 
9760 /* Assert captures then. */
9761 common->then_trap = NULL;
9762 
9763 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9764   {
9765   SLJIT_ASSERT(!conditional);
9766   bra = *cc;
9767   cc++;
9768   }
9769 private_data_ptr = PRIVATE_DATA(cc);
9770 SLJIT_ASSERT(private_data_ptr != 0);
9771 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9772 backtrack->framesize = framesize;
9773 backtrack->private_data_ptr = private_data_ptr;
9774 opcode = *cc;
9775 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9776 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9777 ccbegin = cc;
9778 cc += GET(cc, 1);
9779 
9780 if (bra == OP_BRAMINZERO)
9781   {
9782   /* This is a braminzero backtrack path. */
9783   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9784   free_stack(common, 1);
9785   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9786   }
9787 
9788 if (framesize < 0)
9789   {
9790   extrasize = 1;
9791   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9792     extrasize = 0;
9793 
9794   if (needs_control_head)
9795     extrasize++;
9796 
9797   if (framesize == no_frame)
9798     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9799 
9800   if (extrasize > 0)
9801     allocate_stack(common, extrasize);
9802 
9803   if (needs_control_head)
9804     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9805 
9806   if (extrasize > 0)
9807     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9808 
9809   if (needs_control_head)
9810     {
9811     SLJIT_ASSERT(extrasize == 2);
9812     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9813     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9814     }
9815   }
9816 else
9817   {
9818   extrasize = needs_control_head ? 3 : 2;
9819   allocate_stack(common, framesize + extrasize);
9820 
9821   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9822   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9823   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9824   if (needs_control_head)
9825     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9826   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9827 
9828   if (needs_control_head)
9829     {
9830     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9831     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9832     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9833     }
9834   else
9835     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9836 
9837   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9838   }
9839 
9840 memset(&altbacktrack, 0, sizeof(backtrack_common));
9841 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9842   {
9843   /* Control verbs cannot escape from these asserts. */
9844   local_quit_available = TRUE;
9845   common->local_quit_available = TRUE;
9846   common->quit_label = NULL;
9847   common->quit = NULL;
9848   }
9849 
9850 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9851 common->positive_assertion_quit = NULL;
9852 
9853 while (1)
9854   {
9855   common->accept_label = NULL;
9856   common->accept = NULL;
9857   altbacktrack.top = NULL;
9858   altbacktrack.topbacktracks = NULL;
9859 
9860   if (*ccbegin == OP_ALT && extrasize > 0)
9861     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9862 
9863   altbacktrack.cc = ccbegin;
9864   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9865   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9866     {
9867     if (local_quit_available)
9868       {
9869       common->local_quit_available = save_local_quit_available;
9870       common->quit_label = save_quit_label;
9871       common->quit = save_quit;
9872       }
9873     common->in_positive_assertion = save_in_positive_assertion;
9874     common->then_trap = save_then_trap;
9875     common->accept_label = save_accept_label;
9876     common->positive_assertion_quit = save_positive_assertion_quit;
9877     common->accept = save_accept;
9878     return NULL;
9879     }
9880   common->accept_label = LABEL();
9881   if (common->accept != NULL)
9882     set_jumps(common->accept, common->accept_label);
9883 
9884   /* Reset stack. */
9885   if (framesize < 0)
9886     {
9887     if (framesize == no_frame)
9888       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9889     else if (extrasize > 0)
9890       free_stack(common, extrasize);
9891 
9892     if (needs_control_head)
9893       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9894     }
9895   else
9896     {
9897     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9898       {
9899       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9900       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9901       if (needs_control_head)
9902         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9903       }
9904     else
9905       {
9906       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9907       if (needs_control_head)
9908         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9909       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9910       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9911       }
9912     }
9913 
9914   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9915     {
9916     /* We know that STR_PTR was stored on the top of the stack. */
9917     if (conditional)
9918       {
9919       if (extrasize > 0)
9920         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9921       }
9922     else if (bra == OP_BRAZERO)
9923       {
9924       if (framesize < 0)
9925         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9926       else
9927         {
9928         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9929         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9930         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9931         }
9932       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9933       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9934       }
9935     else if (framesize >= 0)
9936       {
9937       /* For OP_BRA and OP_BRAMINZERO. */
9938       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9939       }
9940     }
9941   add_jump(compiler, found, JUMP(SLJIT_JUMP));
9942 
9943   compile_backtrackingpath(common, altbacktrack.top);
9944   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9945     {
9946     if (local_quit_available)
9947       {
9948       common->local_quit_available = save_local_quit_available;
9949       common->quit_label = save_quit_label;
9950       common->quit = save_quit;
9951       }
9952     common->in_positive_assertion = save_in_positive_assertion;
9953     common->then_trap = save_then_trap;
9954     common->accept_label = save_accept_label;
9955     common->positive_assertion_quit = save_positive_assertion_quit;
9956     common->accept = save_accept;
9957     return NULL;
9958     }
9959   set_jumps(altbacktrack.topbacktracks, LABEL());
9960 
9961   if (*cc != OP_ALT)
9962     break;
9963 
9964   ccbegin = cc;
9965   cc += GET(cc, 1);
9966   }
9967 
9968 if (local_quit_available)
9969   {
9970   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9971   /* Makes the check less complicated below. */
9972   common->positive_assertion_quit = common->quit;
9973   }
9974 
9975 /* None of them matched. */
9976 if (common->positive_assertion_quit != NULL)
9977   {
9978   jump = JUMP(SLJIT_JUMP);
9979   set_jumps(common->positive_assertion_quit, LABEL());
9980   SLJIT_ASSERT(framesize != no_stack);
9981   if (framesize < 0)
9982     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9983   else
9984     {
9985     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9986     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9987     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9988     }
9989   JUMPHERE(jump);
9990   }
9991 
9992 if (needs_control_head)
9993   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9994 
9995 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9996   {
9997   /* Assert is failed. */
9998   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9999     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10000 
10001   if (framesize < 0)
10002     {
10003     /* The topmost item should be 0. */
10004     if (bra == OP_BRAZERO)
10005       {
10006       if (extrasize == 2)
10007         free_stack(common, 1);
10008       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10009       }
10010     else if (extrasize > 0)
10011       free_stack(common, extrasize);
10012     }
10013   else
10014     {
10015     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10016     /* The topmost item should be 0. */
10017     if (bra == OP_BRAZERO)
10018       {
10019       free_stack(common, framesize + extrasize - 1);
10020       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10021       }
10022     else
10023       free_stack(common, framesize + extrasize);
10024     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10025     }
10026   jump = JUMP(SLJIT_JUMP);
10027   if (bra != OP_BRAZERO)
10028     add_jump(compiler, target, jump);
10029 
10030   /* Assert is successful. */
10031   set_jumps(tmp, LABEL());
10032   if (framesize < 0)
10033     {
10034     /* We know that STR_PTR was stored on the top of the stack. */
10035     if (extrasize > 0)
10036       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10037 
10038     /* Keep the STR_PTR on the top of the stack. */
10039     if (bra == OP_BRAZERO)
10040       {
10041       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10042       if (extrasize == 2)
10043         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10044       }
10045     else if (bra == OP_BRAMINZERO)
10046       {
10047       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10048       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10049       }
10050     }
10051   else
10052     {
10053     if (bra == OP_BRA)
10054       {
10055       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10056       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10057       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10058       }
10059     else
10060       {
10061       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10062       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10063       if (extrasize == 2)
10064         {
10065         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066         if (bra == OP_BRAMINZERO)
10067           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10068         }
10069       else
10070         {
10071         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
10072         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10073         }
10074       }
10075     }
10076 
10077   if (bra == OP_BRAZERO)
10078     {
10079     backtrack->matchingpath = LABEL();
10080     SET_LABEL(jump, backtrack->matchingpath);
10081     }
10082   else if (bra == OP_BRAMINZERO)
10083     {
10084     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10085     JUMPHERE(brajump);
10086     if (framesize >= 0)
10087       {
10088       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10089       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10090       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10091       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10092       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10093       }
10094     set_jumps(backtrack->common.topbacktracks, LABEL());
10095     }
10096   }
10097 else
10098   {
10099   /* AssertNot is successful. */
10100   if (framesize < 0)
10101     {
10102     if (extrasize > 0)
10103       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10104 
10105     if (bra != OP_BRA)
10106       {
10107       if (extrasize == 2)
10108         free_stack(common, 1);
10109       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10110       }
10111     else if (extrasize > 0)
10112       free_stack(common, extrasize);
10113     }
10114   else
10115     {
10116     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10118     /* The topmost item should be 0. */
10119     if (bra != OP_BRA)
10120       {
10121       free_stack(common, framesize + extrasize - 1);
10122       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10123       }
10124     else
10125       free_stack(common, framesize + extrasize);
10126     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10127     }
10128 
10129   if (bra == OP_BRAZERO)
10130     backtrack->matchingpath = LABEL();
10131   else if (bra == OP_BRAMINZERO)
10132     {
10133     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10134     JUMPHERE(brajump);
10135     }
10136 
10137   if (bra != OP_BRA)
10138     {
10139     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10140     set_jumps(backtrack->common.topbacktracks, LABEL());
10141     backtrack->common.topbacktracks = NULL;
10142     }
10143   }
10144 
10145 if (local_quit_available)
10146   {
10147   common->local_quit_available = save_local_quit_available;
10148   common->quit_label = save_quit_label;
10149   common->quit = save_quit;
10150   }
10151 common->in_positive_assertion = save_in_positive_assertion;
10152 common->then_trap = save_then_trap;
10153 common->accept_label = save_accept_label;
10154 common->positive_assertion_quit = save_positive_assertion_quit;
10155 common->accept = save_accept;
10156 return cc + 1 + LINK_SIZE;
10157 }
10158 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10159 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10160 {
10161 DEFINE_COMPILER;
10162 int stacksize;
10163 
10164 if (framesize < 0)
10165   {
10166   if (framesize == no_frame)
10167     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10168   else
10169     {
10170     stacksize = needs_control_head ? 1 : 0;
10171     if (ket != OP_KET || has_alternatives)
10172       stacksize++;
10173 
10174     if (stacksize > 0)
10175       free_stack(common, stacksize);
10176     }
10177 
10178   if (needs_control_head)
10179     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10180 
10181   /* TMP2 which is set here used by OP_KETRMAX below. */
10182   if (ket == OP_KETRMAX)
10183     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10184   else if (ket == OP_KETRMIN)
10185     {
10186     /* Move the STR_PTR to the private_data_ptr. */
10187     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10188     }
10189   }
10190 else
10191   {
10192   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10193   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10194   if (needs_control_head)
10195     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196 
10197   if (ket == OP_KETRMAX)
10198     {
10199     /* TMP2 which is set here used by OP_KETRMAX below. */
10200     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10201     }
10202   }
10203 if (needs_control_head)
10204   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10205 }
10206 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10207 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10208 {
10209 DEFINE_COMPILER;
10210 
10211 if (common->capture_last_ptr != 0)
10212   {
10213   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10214   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10215   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10216   stacksize++;
10217   }
10218 if (common->optimized_cbracket[offset >> 1] == 0)
10219   {
10220   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10221   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10222   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10223   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10224   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10225   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10226   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10227   stacksize += 2;
10228   }
10229 return stacksize;
10230 }
10231 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10232 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10233 {
10234   if (PRIV(script_run)(ptr, endptr, FALSE))
10235     return endptr;
10236   return NULL;
10237 }
10238 
10239 #ifdef SUPPORT_UNICODE
10240 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10241 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10242 {
10243   if (PRIV(script_run)(ptr, endptr, TRUE))
10244     return endptr;
10245   return NULL;
10246 }
10247 
10248 #endif /* SUPPORT_UNICODE */
10249 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10250 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10251 {
10252 DEFINE_COMPILER;
10253 
10254 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10255 
10256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10257 #ifdef SUPPORT_UNICODE
10258 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10259   common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10260 #else
10261 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10262 #endif
10263 
10264 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10265 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10266 }
10267 
10268 /*
10269   Handling bracketed expressions is probably the most complex part.
10270 
10271   Stack layout naming characters:
10272     S - Push the current STR_PTR
10273     0 - Push a 0 (NULL)
10274     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10275         before the next alternative. Not pushed if there are no alternatives.
10276     M - Any values pushed by the current alternative. Can be empty, or anything.
10277     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10278     L - Push the previous local (pointed by localptr) to the stack
10279    () - opional values stored on the stack
10280   ()* - optonal, can be stored multiple times
10281 
10282   The following list shows the regular expression templates, their PCRE byte codes
10283   and stack layout supported by pcre-sljit.
10284 
10285   (?:)                     OP_BRA     | OP_KET                A M
10286   ()                       OP_CBRA    | OP_KET                C M
10287   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10288                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10289   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10290                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10291   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10292                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10293   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10294                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10295   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10296   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10297   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10298   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10299   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10300            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10301   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10302            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10303   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10304            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10305   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10306            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10307 
10308 
10309   Stack layout naming characters:
10310     A - Push the alternative index (starting from 0) on the stack.
10311         Not pushed if there is no alternatives.
10312     M - Any values pushed by the current alternative. Can be empty, or anything.
10313 
10314   The next list shows the possible content of a bracket:
10315   (|)     OP_*BRA    | OP_ALT ...         M A
10316   (?()|)  OP_*COND   | OP_ALT             M A
10317   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10318                                           Or nothing, if trace is unnecessary
10319 */
10320 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10321 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10322 {
10323 DEFINE_COMPILER;
10324 backtrack_common *backtrack;
10325 PCRE2_UCHAR opcode;
10326 int private_data_ptr = 0;
10327 int offset = 0;
10328 int i, stacksize;
10329 int repeat_ptr = 0, repeat_length = 0;
10330 int repeat_type = 0, repeat_count = 0;
10331 PCRE2_SPTR ccbegin;
10332 PCRE2_SPTR matchingpath;
10333 PCRE2_SPTR slot;
10334 PCRE2_UCHAR bra = OP_BRA;
10335 PCRE2_UCHAR ket;
10336 assert_backtrack *assert;
10337 BOOL has_alternatives;
10338 BOOL needs_control_head = FALSE;
10339 struct sljit_jump *jump;
10340 struct sljit_jump *skip;
10341 struct sljit_label *rmax_label = NULL;
10342 struct sljit_jump *braminzero = NULL;
10343 
10344 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10345 
10346 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10347   {
10348   bra = *cc;
10349   cc++;
10350   opcode = *cc;
10351   }
10352 
10353 opcode = *cc;
10354 ccbegin = cc;
10355 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10356 ket = *matchingpath;
10357 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10358   {
10359   repeat_ptr = PRIVATE_DATA(matchingpath);
10360   repeat_length = PRIVATE_DATA(matchingpath + 1);
10361   repeat_type = PRIVATE_DATA(matchingpath + 2);
10362   repeat_count = PRIVATE_DATA(matchingpath + 3);
10363   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10364   if (repeat_type == OP_UPTO)
10365     ket = OP_KETRMAX;
10366   if (repeat_type == OP_MINUPTO)
10367     ket = OP_KETRMIN;
10368   }
10369 
10370 matchingpath = ccbegin + 1 + LINK_SIZE;
10371 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10372 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10373 cc += GET(cc, 1);
10374 
10375 has_alternatives = *cc == OP_ALT;
10376 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10377   {
10378   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10379     compile_time_checks_must_be_grouped_together);
10380   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10381   }
10382 
10383 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10384   opcode = OP_SCOND;
10385 
10386 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10387   {
10388   /* Capturing brackets has a pre-allocated space. */
10389   offset = GET2(ccbegin, 1 + LINK_SIZE);
10390   if (common->optimized_cbracket[offset] == 0)
10391     {
10392     private_data_ptr = OVECTOR_PRIV(offset);
10393     offset <<= 1;
10394     }
10395   else
10396     {
10397     offset <<= 1;
10398     private_data_ptr = OVECTOR(offset);
10399     }
10400   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10401   matchingpath += IMM2_SIZE;
10402   }
10403 else if (opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10404   {
10405   /* Other brackets simply allocate the next entry. */
10406   private_data_ptr = PRIVATE_DATA(ccbegin);
10407   SLJIT_ASSERT(private_data_ptr != 0);
10408   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10409   if (opcode == OP_ONCE)
10410     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10411   }
10412 
10413 /* Instructions before the first alternative. */
10414 stacksize = 0;
10415 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10416   stacksize++;
10417 if (bra == OP_BRAZERO)
10418   stacksize++;
10419 
10420 if (stacksize > 0)
10421   allocate_stack(common, stacksize);
10422 
10423 stacksize = 0;
10424 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10425   {
10426   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10427   stacksize++;
10428   }
10429 
10430 if (bra == OP_BRAZERO)
10431   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10432 
10433 if (bra == OP_BRAMINZERO)
10434   {
10435   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10436   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10437   if (ket != OP_KETRMIN)
10438     {
10439     free_stack(common, 1);
10440     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10441     }
10442   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10443     {
10444     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10445     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10446     /* Nothing stored during the first run. */
10447     skip = JUMP(SLJIT_JUMP);
10448     JUMPHERE(jump);
10449     /* Checking zero-length iteration. */
10450     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10451       {
10452       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10453       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454       }
10455     else
10456       {
10457       /* Except when the whole stack frame must be saved. */
10458       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10459       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10460       }
10461     JUMPHERE(skip);
10462     }
10463   else
10464     {
10465     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10466     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10467     JUMPHERE(jump);
10468     }
10469   }
10470 
10471 if (repeat_type != 0)
10472   {
10473   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10474   if (repeat_type == OP_EXACT)
10475     rmax_label = LABEL();
10476   }
10477 
10478 if (ket == OP_KETRMIN)
10479   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10480 
10481 if (ket == OP_KETRMAX)
10482   {
10483   rmax_label = LABEL();
10484   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10485     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10486   }
10487 
10488 /* Handling capturing brackets and alternatives. */
10489 if (opcode == OP_ONCE)
10490   {
10491   stacksize = 0;
10492   if (needs_control_head)
10493     {
10494     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10495     stacksize++;
10496     }
10497 
10498   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10499     {
10500     /* Neither capturing brackets nor recursions are found in the block. */
10501     if (ket == OP_KETRMIN)
10502       {
10503       stacksize += 2;
10504       if (!needs_control_head)
10505         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10506       }
10507     else
10508       {
10509       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10510         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10511       if (ket == OP_KETRMAX || has_alternatives)
10512         stacksize++;
10513       }
10514 
10515     if (stacksize > 0)
10516       allocate_stack(common, stacksize);
10517 
10518     stacksize = 0;
10519     if (needs_control_head)
10520       {
10521       stacksize++;
10522       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10523       }
10524 
10525     if (ket == OP_KETRMIN)
10526       {
10527       if (needs_control_head)
10528         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10529       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10530       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10531         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10532       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10533       }
10534     else if (ket == OP_KETRMAX || has_alternatives)
10535       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10536     }
10537   else
10538     {
10539     if (ket != OP_KET || has_alternatives)
10540       stacksize++;
10541 
10542     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10543     allocate_stack(common, stacksize);
10544 
10545     if (needs_control_head)
10546       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10547 
10548     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10549     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10550 
10551     stacksize = needs_control_head ? 1 : 0;
10552     if (ket != OP_KET || has_alternatives)
10553       {
10554       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10555       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10556       stacksize++;
10557       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10558       }
10559     else
10560       {
10561       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10562       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10563       }
10564     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10565     }
10566   }
10567 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10568   {
10569   /* Saving the previous values. */
10570   if (common->optimized_cbracket[offset >> 1] != 0)
10571     {
10572     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10573     allocate_stack(common, 2);
10574     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10575     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10576     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10577     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10578     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10579     }
10580   else
10581     {
10582     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583     allocate_stack(common, 1);
10584     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10585     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10586     }
10587   }
10588 else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10589   {
10590   /* Saving the previous value. */
10591   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10592   allocate_stack(common, 1);
10593   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10594   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10595   }
10596 else if (has_alternatives)
10597   {
10598   /* Pushing the starting string pointer. */
10599   allocate_stack(common, 1);
10600   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10601   }
10602 
10603 /* Generating code for the first alternative. */
10604 if (opcode == OP_COND || opcode == OP_SCOND)
10605   {
10606   if (*matchingpath == OP_CREF)
10607     {
10608     SLJIT_ASSERT(has_alternatives);
10609     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10610       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10611     matchingpath += 1 + IMM2_SIZE;
10612     }
10613   else if (*matchingpath == OP_DNCREF)
10614     {
10615     SLJIT_ASSERT(has_alternatives);
10616 
10617     i = GET2(matchingpath, 1 + IMM2_SIZE);
10618     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10619     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10620     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10621     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10622     slot += common->name_entry_size;
10623     i--;
10624     while (i-- > 0)
10625       {
10626       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10627       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10628       slot += common->name_entry_size;
10629       }
10630     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10631     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10632     matchingpath += 1 + 2 * IMM2_SIZE;
10633     }
10634   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10635     {
10636     /* Never has other case. */
10637     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10638     SLJIT_ASSERT(!has_alternatives);
10639 
10640     if (*matchingpath == OP_TRUE)
10641       {
10642       stacksize = 1;
10643       matchingpath++;
10644       }
10645     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10646       stacksize = 0;
10647     else if (*matchingpath == OP_RREF)
10648       {
10649       stacksize = GET2(matchingpath, 1);
10650       if (common->currententry == NULL)
10651         stacksize = 0;
10652       else if (stacksize == RREF_ANY)
10653         stacksize = 1;
10654       else if (common->currententry->start == 0)
10655         stacksize = stacksize == 0;
10656       else
10657         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10658 
10659       if (stacksize != 0)
10660         matchingpath += 1 + IMM2_SIZE;
10661       }
10662     else
10663       {
10664       if (common->currententry == NULL || common->currententry->start == 0)
10665         stacksize = 0;
10666       else
10667         {
10668         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10669         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10670         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10671         while (stacksize > 0)
10672           {
10673           if ((int)GET2(slot, 0) == i)
10674             break;
10675           slot += common->name_entry_size;
10676           stacksize--;
10677           }
10678         }
10679 
10680       if (stacksize != 0)
10681         matchingpath += 1 + 2 * IMM2_SIZE;
10682       }
10683 
10684       /* The stacksize == 0 is a common "else" case. */
10685       if (stacksize == 0)
10686         {
10687         if (*cc == OP_ALT)
10688           {
10689           matchingpath = cc + 1 + LINK_SIZE;
10690           cc += GET(cc, 1);
10691           }
10692         else
10693           matchingpath = cc;
10694         }
10695     }
10696   else
10697     {
10698     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10699     /* Similar code as PUSH_BACKTRACK macro. */
10700     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10701     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10702       return NULL;
10703     memset(assert, 0, sizeof(assert_backtrack));
10704     assert->common.cc = matchingpath;
10705     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10706     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10707     }
10708   }
10709 
10710 compile_matchingpath(common, matchingpath, cc, backtrack);
10711 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10712   return NULL;
10713 
10714 if (opcode == OP_ONCE)
10715   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10716 
10717 if (opcode == OP_SCRIPT_RUN)
10718   match_script_run_common(common, private_data_ptr, backtrack);
10719 
10720 stacksize = 0;
10721 if (repeat_type == OP_MINUPTO)
10722   {
10723   /* We need to preserve the counter. TMP2 will be used below. */
10724   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10725   stacksize++;
10726   }
10727 if (ket != OP_KET || bra != OP_BRA)
10728   stacksize++;
10729 if (offset != 0)
10730   {
10731   if (common->capture_last_ptr != 0)
10732     stacksize++;
10733   if (common->optimized_cbracket[offset >> 1] == 0)
10734     stacksize += 2;
10735   }
10736 if (has_alternatives && opcode != OP_ONCE)
10737   stacksize++;
10738 
10739 if (stacksize > 0)
10740   allocate_stack(common, stacksize);
10741 
10742 stacksize = 0;
10743 if (repeat_type == OP_MINUPTO)
10744   {
10745   /* TMP2 was set above. */
10746   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10747   stacksize++;
10748   }
10749 
10750 if (ket != OP_KET || bra != OP_BRA)
10751   {
10752   if (ket != OP_KET)
10753     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10754   else
10755     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10756   stacksize++;
10757   }
10758 
10759 if (offset != 0)
10760   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10761 
10762 if (has_alternatives)
10763   {
10764   if (opcode != OP_ONCE)
10765     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10766   if (ket != OP_KETRMAX)
10767     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10768   }
10769 
10770 /* Must be after the matchingpath label. */
10771 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10772   {
10773   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10774   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10775   }
10776 
10777 if (ket == OP_KETRMAX)
10778   {
10779   if (repeat_type != 0)
10780     {
10781     if (has_alternatives)
10782       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10783     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10784     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10785     /* Drop STR_PTR for greedy plus quantifier. */
10786     if (opcode != OP_ONCE)
10787       free_stack(common, 1);
10788     }
10789   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10790     {
10791     if (has_alternatives)
10792       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10793 
10794     /* Checking zero-length iteration. */
10795     if (opcode != OP_ONCE)
10796       {
10797       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10798       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10799       /* Drop STR_PTR for greedy plus quantifier. */
10800       if (bra != OP_BRAZERO)
10801         free_stack(common, 1);
10802       }
10803     else
10804       /* TMP2 must contain the starting STR_PTR. */
10805       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10806     }
10807   else
10808     JUMPTO(SLJIT_JUMP, rmax_label);
10809   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10810   }
10811 
10812 if (repeat_type == OP_EXACT)
10813   {
10814   count_match(common);
10815   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10816   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10817   }
10818 else if (repeat_type == OP_UPTO)
10819   {
10820   /* We need to preserve the counter. */
10821   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10822   allocate_stack(common, 1);
10823   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10824   }
10825 
10826 if (bra == OP_BRAZERO)
10827   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10828 
10829 if (bra == OP_BRAMINZERO)
10830   {
10831   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10832   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10833   if (braminzero != NULL)
10834     {
10835     JUMPHERE(braminzero);
10836     /* We need to release the end pointer to perform the
10837     backtrack for the zero-length iteration. When
10838     framesize is < 0, OP_ONCE will do the release itself. */
10839     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10840       {
10841       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10842       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10843       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10844       }
10845     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10846       free_stack(common, 1);
10847     }
10848   /* Continue to the normal backtrack. */
10849   }
10850 
10851 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10852   count_match(common);
10853 
10854 /* Skip the other alternatives. */
10855 while (*cc == OP_ALT)
10856   cc += GET(cc, 1);
10857 cc += 1 + LINK_SIZE;
10858 
10859 if (opcode == OP_ONCE)
10860   {
10861   /* We temporarily encode the needs_control_head in the lowest bit.
10862      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10863      the same value for small signed numbers (including negative numbers). */
10864   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10865   }
10866 return cc + repeat_length;
10867 }
10868 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10869 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10870 {
10871 DEFINE_COMPILER;
10872 backtrack_common *backtrack;
10873 PCRE2_UCHAR opcode;
10874 int private_data_ptr;
10875 int cbraprivptr = 0;
10876 BOOL needs_control_head;
10877 int framesize;
10878 int stacksize;
10879 int offset = 0;
10880 BOOL zero = FALSE;
10881 PCRE2_SPTR ccbegin = NULL;
10882 int stack; /* Also contains the offset of control head. */
10883 struct sljit_label *loop = NULL;
10884 struct jump_list *emptymatch = NULL;
10885 
10886 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10887 if (*cc == OP_BRAPOSZERO)
10888   {
10889   zero = TRUE;
10890   cc++;
10891   }
10892 
10893 opcode = *cc;
10894 private_data_ptr = PRIVATE_DATA(cc);
10895 SLJIT_ASSERT(private_data_ptr != 0);
10896 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10897 switch(opcode)
10898   {
10899   case OP_BRAPOS:
10900   case OP_SBRAPOS:
10901   ccbegin = cc + 1 + LINK_SIZE;
10902   break;
10903 
10904   case OP_CBRAPOS:
10905   case OP_SCBRAPOS:
10906   offset = GET2(cc, 1 + LINK_SIZE);
10907   /* This case cannot be optimized in the same was as
10908   normal capturing brackets. */
10909   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10910   cbraprivptr = OVECTOR_PRIV(offset);
10911   offset <<= 1;
10912   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10913   break;
10914 
10915   default:
10916   SLJIT_UNREACHABLE();
10917   break;
10918   }
10919 
10920 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10921 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10922 if (framesize < 0)
10923   {
10924   if (offset != 0)
10925     {
10926     stacksize = 2;
10927     if (common->capture_last_ptr != 0)
10928       stacksize++;
10929     }
10930   else
10931     stacksize = 1;
10932 
10933   if (needs_control_head)
10934     stacksize++;
10935   if (!zero)
10936     stacksize++;
10937 
10938   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10939   allocate_stack(common, stacksize);
10940   if (framesize == no_frame)
10941     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10942 
10943   stack = 0;
10944   if (offset != 0)
10945     {
10946     stack = 2;
10947     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10948     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10949     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10950     if (common->capture_last_ptr != 0)
10951       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10952     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10953     if (needs_control_head)
10954       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10955     if (common->capture_last_ptr != 0)
10956       {
10957       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10958       stack = 3;
10959       }
10960     }
10961   else
10962     {
10963     if (needs_control_head)
10964       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10965     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10966     stack = 1;
10967     }
10968 
10969   if (needs_control_head)
10970     stack++;
10971   if (!zero)
10972     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10973   if (needs_control_head)
10974     {
10975     stack--;
10976     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10977     }
10978   }
10979 else
10980   {
10981   stacksize = framesize + 1;
10982   if (!zero)
10983     stacksize++;
10984   if (needs_control_head)
10985     stacksize++;
10986   if (offset == 0)
10987     stacksize++;
10988   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10989 
10990   allocate_stack(common, stacksize);
10991   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10992   if (needs_control_head)
10993     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10994   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10995 
10996   stack = 0;
10997   if (!zero)
10998     {
10999     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11000     stack = 1;
11001     }
11002   if (needs_control_head)
11003     {
11004     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11005     stack++;
11006     }
11007   if (offset == 0)
11008     {
11009     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11010     stack++;
11011     }
11012   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11013   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11014   stack -= 1 + (offset == 0);
11015   }
11016 
11017 if (offset != 0)
11018   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11019 
11020 loop = LABEL();
11021 while (*cc != OP_KETRPOS)
11022   {
11023   backtrack->top = NULL;
11024   backtrack->topbacktracks = NULL;
11025   cc += GET(cc, 1);
11026 
11027   compile_matchingpath(common, ccbegin, cc, backtrack);
11028   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11029     return NULL;
11030 
11031   if (framesize < 0)
11032     {
11033     if (framesize == no_frame)
11034       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11035 
11036     if (offset != 0)
11037       {
11038       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11039       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11040       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11041       if (common->capture_last_ptr != 0)
11042         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11043       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11044       }
11045     else
11046       {
11047       if (opcode == OP_SBRAPOS)
11048         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11049       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11050       }
11051 
11052     /* Even if the match is empty, we need to reset the control head. */
11053     if (needs_control_head)
11054       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11055 
11056     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11057       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11058 
11059     if (!zero)
11060       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11061     }
11062   else
11063     {
11064     if (offset != 0)
11065       {
11066       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11067       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11068       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11069       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11070       if (common->capture_last_ptr != 0)
11071         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11072       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11073       }
11074     else
11075       {
11076       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11077       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11078       if (opcode == OP_SBRAPOS)
11079         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11080       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11081       }
11082 
11083     /* Even if the match is empty, we need to reset the control head. */
11084     if (needs_control_head)
11085       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11086 
11087     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11088       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11089 
11090     if (!zero)
11091       {
11092       if (framesize < 0)
11093         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11094       else
11095         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11096       }
11097     }
11098 
11099   JUMPTO(SLJIT_JUMP, loop);
11100   flush_stubs(common);
11101 
11102   compile_backtrackingpath(common, backtrack->top);
11103   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11104     return NULL;
11105   set_jumps(backtrack->topbacktracks, LABEL());
11106 
11107   if (framesize < 0)
11108     {
11109     if (offset != 0)
11110       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11111     else
11112       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11113     }
11114   else
11115     {
11116     if (offset != 0)
11117       {
11118       /* Last alternative. */
11119       if (*cc == OP_KETRPOS)
11120         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11121       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11122       }
11123     else
11124       {
11125       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11126       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11127       }
11128     }
11129 
11130   if (*cc == OP_KETRPOS)
11131     break;
11132   ccbegin = cc + 1 + LINK_SIZE;
11133   }
11134 
11135 /* We don't have to restore the control head in case of a failed match. */
11136 
11137 backtrack->topbacktracks = NULL;
11138 if (!zero)
11139   {
11140   if (framesize < 0)
11141     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11142   else /* TMP2 is set to [private_data_ptr] above. */
11143     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11144   }
11145 
11146 /* None of them matched. */
11147 set_jumps(emptymatch, LABEL());
11148 count_match(common);
11149 return cc + 1 + LINK_SIZE;
11150 }
11151 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11152 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11153 {
11154 int class_len;
11155 
11156 *opcode = *cc;
11157 *exact = 0;
11158 
11159 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11160   {
11161   cc++;
11162   *type = OP_CHAR;
11163   }
11164 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11165   {
11166   cc++;
11167   *type = OP_CHARI;
11168   *opcode -= OP_STARI - OP_STAR;
11169   }
11170 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11171   {
11172   cc++;
11173   *type = OP_NOT;
11174   *opcode -= OP_NOTSTAR - OP_STAR;
11175   }
11176 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11177   {
11178   cc++;
11179   *type = OP_NOTI;
11180   *opcode -= OP_NOTSTARI - OP_STAR;
11181   }
11182 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11183   {
11184   cc++;
11185   *opcode -= OP_TYPESTAR - OP_STAR;
11186   *type = OP_END;
11187   }
11188 else
11189   {
11190   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11191   *type = *opcode;
11192   cc++;
11193   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11194   *opcode = cc[class_len - 1];
11195 
11196   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11197     {
11198     *opcode -= OP_CRSTAR - OP_STAR;
11199     *end = cc + class_len;
11200 
11201     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11202       {
11203       *exact = 1;
11204       *opcode -= OP_PLUS - OP_STAR;
11205       }
11206     }
11207   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11208     {
11209     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11210     *end = cc + class_len;
11211 
11212     if (*opcode == OP_POSPLUS)
11213       {
11214       *exact = 1;
11215       *opcode = OP_POSSTAR;
11216       }
11217     }
11218   else
11219     {
11220     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11221     *max = GET2(cc, (class_len + IMM2_SIZE));
11222     *exact = GET2(cc, class_len);
11223 
11224     if (*max == 0)
11225       {
11226       if (*opcode == OP_CRPOSRANGE)
11227         *opcode = OP_POSSTAR;
11228       else
11229         *opcode -= OP_CRRANGE - OP_STAR;
11230       }
11231     else
11232       {
11233       *max -= *exact;
11234       if (*max == 0)
11235         *opcode = OP_EXACT;
11236       else if (*max == 1)
11237         {
11238         if (*opcode == OP_CRPOSRANGE)
11239           *opcode = OP_POSQUERY;
11240         else
11241           *opcode -= OP_CRRANGE - OP_QUERY;
11242         }
11243       else
11244         {
11245         if (*opcode == OP_CRPOSRANGE)
11246           *opcode = OP_POSUPTO;
11247         else
11248           *opcode -= OP_CRRANGE - OP_UPTO;
11249         }
11250       }
11251     *end = cc + class_len + 2 * IMM2_SIZE;
11252     }
11253   return cc;
11254   }
11255 
11256 switch(*opcode)
11257   {
11258   case OP_EXACT:
11259   *exact = GET2(cc, 0);
11260   cc += IMM2_SIZE;
11261   break;
11262 
11263   case OP_PLUS:
11264   case OP_MINPLUS:
11265   *exact = 1;
11266   *opcode -= OP_PLUS - OP_STAR;
11267   break;
11268 
11269   case OP_POSPLUS:
11270   *exact = 1;
11271   *opcode = OP_POSSTAR;
11272   break;
11273 
11274   case OP_UPTO:
11275   case OP_MINUPTO:
11276   case OP_POSUPTO:
11277   *max = GET2(cc, 0);
11278   cc += IMM2_SIZE;
11279   break;
11280   }
11281 
11282 if (*type == OP_END)
11283   {
11284   *type = *cc;
11285   *end = next_opcode(common, cc);
11286   cc++;
11287   return cc;
11288   }
11289 
11290 *end = cc + 1;
11291 #ifdef SUPPORT_UNICODE
11292 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11293 #endif
11294 return cc;
11295 }
11296 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11297 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11298 {
11299 DEFINE_COMPILER;
11300 backtrack_common *backtrack;
11301 PCRE2_UCHAR opcode;
11302 PCRE2_UCHAR type;
11303 sljit_u32 max = 0, exact;
11304 BOOL fast_fail;
11305 sljit_s32 fast_str_ptr;
11306 BOOL charpos_enabled;
11307 PCRE2_UCHAR charpos_char;
11308 unsigned int charpos_othercasebit;
11309 PCRE2_SPTR end;
11310 jump_list *no_match = NULL;
11311 jump_list *no_char1_match = NULL;
11312 struct sljit_jump *jump = NULL;
11313 struct sljit_label *label;
11314 int private_data_ptr = PRIVATE_DATA(cc);
11315 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11316 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11317 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11318 int tmp_base, tmp_offset;
11319 
11320 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11321 
11322 fast_str_ptr = PRIVATE_DATA(cc + 1);
11323 fast_fail = TRUE;
11324 
11325 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
11326 
11327 if (cc == common->fast_forward_bc_ptr)
11328   fast_fail = FALSE;
11329 else if (common->fast_fail_start_ptr == 0)
11330   fast_str_ptr = 0;
11331 
11332 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
11333   || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
11334 
11335 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11336 
11337 if (type != OP_EXTUNI)
11338   {
11339   tmp_base = TMP3;
11340   tmp_offset = 0;
11341   }
11342 else
11343   {
11344   tmp_base = SLJIT_MEM1(SLJIT_SP);
11345   tmp_offset = POSSESSIVE0;
11346   }
11347 
11348 if (fast_fail && fast_str_ptr != 0)
11349   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
11350 
11351 /* Handle fixed part first. */
11352 if (exact > 1)
11353   {
11354   SLJIT_ASSERT(fast_str_ptr == 0);
11355   if (common->mode == PCRE2_JIT_COMPLETE
11356 #ifdef SUPPORT_UNICODE
11357       && !common->utf
11358 #endif
11359       && type != OP_ANYNL && type != OP_EXTUNI)
11360     {
11361     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11362     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11363     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11364     label = LABEL();
11365     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11366     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11367     JUMPTO(SLJIT_NOT_ZERO, label);
11368     }
11369   else
11370     {
11371     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11372     label = LABEL();
11373     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11374     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11375     JUMPTO(SLJIT_NOT_ZERO, label);
11376     }
11377   }
11378 else if (exact == 1)
11379   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11380 
11381 switch(opcode)
11382   {
11383   case OP_STAR:
11384   case OP_UPTO:
11385   SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
11386 
11387   if (type == OP_ANYNL || type == OP_EXTUNI)
11388     {
11389     SLJIT_ASSERT(private_data_ptr == 0);
11390     SLJIT_ASSERT(fast_str_ptr == 0);
11391 
11392     allocate_stack(common, 2);
11393     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11394     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11395 
11396     if (opcode == OP_UPTO)
11397       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11398 
11399     label = LABEL();
11400     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11401     if (opcode == OP_UPTO)
11402       {
11403       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11404       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11405       jump = JUMP(SLJIT_ZERO);
11406       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11407       }
11408 
11409     /* We cannot use TMP3 because of this allocate_stack. */
11410     allocate_stack(common, 1);
11411     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11412     JUMPTO(SLJIT_JUMP, label);
11413     if (jump != NULL)
11414       JUMPHERE(jump);
11415     }
11416   else
11417     {
11418     charpos_enabled = FALSE;
11419     charpos_char = 0;
11420     charpos_othercasebit = 0;
11421 
11422     if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11423       {
11424       charpos_enabled = TRUE;
11425 #ifdef SUPPORT_UNICODE
11426       charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11427 #endif
11428       if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11429         {
11430         charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11431         if (charpos_othercasebit == 0)
11432           charpos_enabled = FALSE;
11433         }
11434 
11435       if (charpos_enabled)
11436         {
11437         charpos_char = end[1];
11438         /* Consumpe the OP_CHAR opcode. */
11439         end += 2;
11440 #if PCRE2_CODE_UNIT_WIDTH == 8
11441         SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11442 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11443         SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11444         if ((charpos_othercasebit & 0x100) != 0)
11445           charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11446 #endif
11447         if (charpos_othercasebit != 0)
11448           charpos_char |= charpos_othercasebit;
11449 
11450         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11451         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11452         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11453         }
11454       }
11455 
11456     if (charpos_enabled)
11457       {
11458       if (opcode == OP_UPTO)
11459         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11460 
11461       /* Search the first instance of charpos_char. */
11462       jump = JUMP(SLJIT_JUMP);
11463       label = LABEL();
11464       if (opcode == OP_UPTO)
11465         {
11466         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11468         }
11469       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11470       if (fast_str_ptr != 0)
11471         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11472       JUMPHERE(jump);
11473 
11474       detect_partial_match(common, &backtrack->topbacktracks);
11475       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11476       if (charpos_othercasebit != 0)
11477         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11478       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11479 
11480       if (private_data_ptr == 0)
11481         allocate_stack(common, 2);
11482       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11483       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11484       if (opcode == OP_UPTO)
11485         {
11486         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11487         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11488         }
11489 
11490       /* Search the last instance of charpos_char. */
11491       label = LABEL();
11492       compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11493       if (fast_str_ptr != 0)
11494         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11495       detect_partial_match(common, &no_match);
11496       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11497       if (charpos_othercasebit != 0)
11498         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11499       if (opcode == OP_STAR)
11500         {
11501         CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11502         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11503         }
11504       else
11505         {
11506         jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11507         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11508         JUMPHERE(jump);
11509         }
11510 
11511       if (opcode == OP_UPTO)
11512         {
11513         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11514         JUMPTO(SLJIT_NOT_ZERO, label);
11515         }
11516       else
11517         JUMPTO(SLJIT_JUMP, label);
11518 
11519       set_jumps(no_match, LABEL());
11520       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11521       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11522       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11523       }
11524 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11525     else if (common->utf)
11526       {
11527       if (private_data_ptr == 0)
11528         allocate_stack(common, 2);
11529 
11530       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11531       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11532 
11533       if (opcode == OP_UPTO)
11534         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11535 
11536       label = LABEL();
11537       compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11538       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11539 
11540       if (opcode == OP_UPTO)
11541         {
11542         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11543         JUMPTO(SLJIT_NOT_ZERO, label);
11544         }
11545       else
11546         JUMPTO(SLJIT_JUMP, label);
11547 
11548       set_jumps(no_match, LABEL());
11549       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11550       if (fast_str_ptr != 0)
11551         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11552       }
11553 #endif
11554     else
11555       {
11556       if (private_data_ptr == 0)
11557         allocate_stack(common, 2);
11558 
11559       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11560       if (opcode == OP_UPTO)
11561         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11562 
11563       label = LABEL();
11564       detect_partial_match(common, &no_match);
11565       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11566       if (opcode == OP_UPTO)
11567         {
11568         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11569         JUMPTO(SLJIT_NOT_ZERO, label);
11570         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11571         }
11572       else
11573         JUMPTO(SLJIT_JUMP, label);
11574 
11575       set_jumps(no_char1_match, LABEL());
11576       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11577       set_jumps(no_match, LABEL());
11578       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11579       if (fast_str_ptr != 0)
11580         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11581       }
11582     }
11583   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11584   break;
11585 
11586   case OP_MINSTAR:
11587   if (private_data_ptr == 0)
11588     allocate_stack(common, 1);
11589   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11590   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11591   if (fast_str_ptr != 0)
11592     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11593   break;
11594 
11595   case OP_MINUPTO:
11596   SLJIT_ASSERT(fast_str_ptr == 0);
11597   if (private_data_ptr == 0)
11598     allocate_stack(common, 2);
11599   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11600   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11601   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11602   break;
11603 
11604   case OP_QUERY:
11605   case OP_MINQUERY:
11606   SLJIT_ASSERT(fast_str_ptr == 0);
11607   if (private_data_ptr == 0)
11608     allocate_stack(common, 1);
11609   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11610   if (opcode == OP_QUERY)
11611     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11612   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11613   break;
11614 
11615   case OP_EXACT:
11616   break;
11617 
11618   case OP_POSSTAR:
11619 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11620   if (common->utf)
11621     {
11622     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11623     label = LABEL();
11624     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11625     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11626     JUMPTO(SLJIT_JUMP, label);
11627     set_jumps(no_match, LABEL());
11628     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11629     if (fast_str_ptr != 0)
11630       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11631     break;
11632     }
11633 #endif
11634   label = LABEL();
11635   detect_partial_match(common, &no_match);
11636   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11637   JUMPTO(SLJIT_JUMP, label);
11638   set_jumps(no_char1_match, LABEL());
11639   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11640   set_jumps(no_match, LABEL());
11641   if (fast_str_ptr != 0)
11642     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11643   break;
11644 
11645   case OP_POSUPTO:
11646   SLJIT_ASSERT(fast_str_ptr == 0);
11647 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11648   if (common->utf)
11649     {
11650     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11651     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11652     label = LABEL();
11653     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11654     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11655     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11656     JUMPTO(SLJIT_NOT_ZERO, label);
11657     set_jumps(no_match, LABEL());
11658     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11659     break;
11660     }
11661 #endif
11662   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11663   label = LABEL();
11664   detect_partial_match(common, &no_match);
11665   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11666   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11667   JUMPTO(SLJIT_NOT_ZERO, label);
11668   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11669   set_jumps(no_char1_match, LABEL());
11670   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11671   set_jumps(no_match, LABEL());
11672   break;
11673 
11674   case OP_POSQUERY:
11675   SLJIT_ASSERT(fast_str_ptr == 0);
11676   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11677   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11678   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11679   set_jumps(no_match, LABEL());
11680   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11681   break;
11682 
11683   default:
11684   SLJIT_UNREACHABLE();
11685   break;
11686   }
11687 
11688 count_match(common);
11689 return end;
11690 }
11691 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11692 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11693 {
11694 DEFINE_COMPILER;
11695 backtrack_common *backtrack;
11696 
11697 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11698 
11699 if (*cc == OP_FAIL)
11700   {
11701   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11702   return cc + 1;
11703   }
11704 
11705 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11706   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11707 
11708 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11709   {
11710   /* No need to check notempty conditions. */
11711   if (common->accept_label == NULL)
11712     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11713   else
11714     JUMPTO(SLJIT_JUMP, common->accept_label);
11715   return cc + 1;
11716   }
11717 
11718 if (common->accept_label == NULL)
11719   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11720 else
11721   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11722 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11723 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11724 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11725 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11726 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11727 if (common->accept_label == NULL)
11728   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11729 else
11730   JUMPTO(SLJIT_ZERO, common->accept_label);
11731 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11732 if (common->accept_label == NULL)
11733   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11734 else
11735   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11736 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11737 return cc + 1;
11738 }
11739 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11740 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11741 {
11742 DEFINE_COMPILER;
11743 int offset = GET2(cc, 1);
11744 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11745 
11746 /* Data will be discarded anyway... */
11747 if (common->currententry != NULL)
11748   return cc + 1 + IMM2_SIZE;
11749 
11750 if (!optimized_cbracket)
11751   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11752 offset <<= 1;
11753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11754 if (!optimized_cbracket)
11755   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11756 return cc + 1 + IMM2_SIZE;
11757 }
11758 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11759 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11760 {
11761 DEFINE_COMPILER;
11762 backtrack_common *backtrack;
11763 PCRE2_UCHAR opcode = *cc;
11764 PCRE2_SPTR ccend = cc + 1;
11765 
11766 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11767     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11768   ccend += 2 + cc[1];
11769 
11770 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11771 
11772 if (opcode == OP_SKIP)
11773   {
11774   allocate_stack(common, 1);
11775   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11776   return ccend;
11777   }
11778 
11779 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11780   {
11781   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11782   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11783   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11784   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11785   }
11786 
11787 return ccend;
11788 }
11789 
11790 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11791 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11792 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11793 {
11794 DEFINE_COMPILER;
11795 backtrack_common *backtrack;
11796 BOOL needs_control_head;
11797 int size;
11798 
11799 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11800 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11801 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11802 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11803 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11804 
11805 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11806 size = 3 + (size < 0 ? 0 : size);
11807 
11808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11809 allocate_stack(common, size);
11810 if (size > 3)
11811   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11812 else
11813   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11817 
11818 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11819 if (size >= 0)
11820   init_frame(common, cc, ccend, size - 1, 0);
11821 }
11822 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11823 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11824 {
11825 DEFINE_COMPILER;
11826 backtrack_common *backtrack;
11827 BOOL has_then_trap = FALSE;
11828 then_trap_backtrack *save_then_trap = NULL;
11829 
11830 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11831 
11832 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11833   {
11834   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11835   has_then_trap = TRUE;
11836   save_then_trap = common->then_trap;
11837   /* Tail item on backtrack. */
11838   compile_then_trap_matchingpath(common, cc, ccend, parent);
11839   }
11840 
11841 while (cc < ccend)
11842   {
11843   switch(*cc)
11844     {
11845     case OP_SOD:
11846     case OP_SOM:
11847     case OP_NOT_WORD_BOUNDARY:
11848     case OP_WORD_BOUNDARY:
11849     case OP_EODN:
11850     case OP_EOD:
11851     case OP_DOLL:
11852     case OP_DOLLM:
11853     case OP_CIRC:
11854     case OP_CIRCM:
11855     case OP_REVERSE:
11856     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11857     break;
11858 
11859     case OP_NOT_DIGIT:
11860     case OP_DIGIT:
11861     case OP_NOT_WHITESPACE:
11862     case OP_WHITESPACE:
11863     case OP_NOT_WORDCHAR:
11864     case OP_WORDCHAR:
11865     case OP_ANY:
11866     case OP_ALLANY:
11867     case OP_ANYBYTE:
11868     case OP_NOTPROP:
11869     case OP_PROP:
11870     case OP_ANYNL:
11871     case OP_NOT_HSPACE:
11872     case OP_HSPACE:
11873     case OP_NOT_VSPACE:
11874     case OP_VSPACE:
11875     case OP_EXTUNI:
11876     case OP_NOT:
11877     case OP_NOTI:
11878     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11879     break;
11880 
11881     case OP_SET_SOM:
11882     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11883     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11884     allocate_stack(common, 1);
11885     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11886     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11887     cc++;
11888     break;
11889 
11890     case OP_CHAR:
11891     case OP_CHARI:
11892     if (common->mode == PCRE2_JIT_COMPLETE)
11893       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11894     else
11895       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11896     break;
11897 
11898     case OP_STAR:
11899     case OP_MINSTAR:
11900     case OP_PLUS:
11901     case OP_MINPLUS:
11902     case OP_QUERY:
11903     case OP_MINQUERY:
11904     case OP_UPTO:
11905     case OP_MINUPTO:
11906     case OP_EXACT:
11907     case OP_POSSTAR:
11908     case OP_POSPLUS:
11909     case OP_POSQUERY:
11910     case OP_POSUPTO:
11911     case OP_STARI:
11912     case OP_MINSTARI:
11913     case OP_PLUSI:
11914     case OP_MINPLUSI:
11915     case OP_QUERYI:
11916     case OP_MINQUERYI:
11917     case OP_UPTOI:
11918     case OP_MINUPTOI:
11919     case OP_EXACTI:
11920     case OP_POSSTARI:
11921     case OP_POSPLUSI:
11922     case OP_POSQUERYI:
11923     case OP_POSUPTOI:
11924     case OP_NOTSTAR:
11925     case OP_NOTMINSTAR:
11926     case OP_NOTPLUS:
11927     case OP_NOTMINPLUS:
11928     case OP_NOTQUERY:
11929     case OP_NOTMINQUERY:
11930     case OP_NOTUPTO:
11931     case OP_NOTMINUPTO:
11932     case OP_NOTEXACT:
11933     case OP_NOTPOSSTAR:
11934     case OP_NOTPOSPLUS:
11935     case OP_NOTPOSQUERY:
11936     case OP_NOTPOSUPTO:
11937     case OP_NOTSTARI:
11938     case OP_NOTMINSTARI:
11939     case OP_NOTPLUSI:
11940     case OP_NOTMINPLUSI:
11941     case OP_NOTQUERYI:
11942     case OP_NOTMINQUERYI:
11943     case OP_NOTUPTOI:
11944     case OP_NOTMINUPTOI:
11945     case OP_NOTEXACTI:
11946     case OP_NOTPOSSTARI:
11947     case OP_NOTPOSPLUSI:
11948     case OP_NOTPOSQUERYI:
11949     case OP_NOTPOSUPTOI:
11950     case OP_TYPESTAR:
11951     case OP_TYPEMINSTAR:
11952     case OP_TYPEPLUS:
11953     case OP_TYPEMINPLUS:
11954     case OP_TYPEQUERY:
11955     case OP_TYPEMINQUERY:
11956     case OP_TYPEUPTO:
11957     case OP_TYPEMINUPTO:
11958     case OP_TYPEEXACT:
11959     case OP_TYPEPOSSTAR:
11960     case OP_TYPEPOSPLUS:
11961     case OP_TYPEPOSQUERY:
11962     case OP_TYPEPOSUPTO:
11963     cc = compile_iterator_matchingpath(common, cc, parent);
11964     break;
11965 
11966     case OP_CLASS:
11967     case OP_NCLASS:
11968     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11969       cc = compile_iterator_matchingpath(common, cc, parent);
11970     else
11971       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11972     break;
11973 
11974 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11975     case OP_XCLASS:
11976     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
11977       cc = compile_iterator_matchingpath(common, cc, parent);
11978     else
11979       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11980     break;
11981 #endif
11982 
11983     case OP_REF:
11984     case OP_REFI:
11985     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
11986       cc = compile_ref_iterator_matchingpath(common, cc, parent);
11987     else
11988       {
11989       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11990       cc += 1 + IMM2_SIZE;
11991       }
11992     break;
11993 
11994     case OP_DNREF:
11995     case OP_DNREFI:
11996     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
11997       cc = compile_ref_iterator_matchingpath(common, cc, parent);
11998     else
11999       {
12000       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12001       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12002       cc += 1 + 2 * IMM2_SIZE;
12003       }
12004     break;
12005 
12006     case OP_RECURSE:
12007     cc = compile_recurse_matchingpath(common, cc, parent);
12008     break;
12009 
12010     case OP_CALLOUT:
12011     case OP_CALLOUT_STR:
12012     cc = compile_callout_matchingpath(common, cc, parent);
12013     break;
12014 
12015     case OP_ASSERT:
12016     case OP_ASSERT_NOT:
12017     case OP_ASSERTBACK:
12018     case OP_ASSERTBACK_NOT:
12019     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12020     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12021     break;
12022 
12023     case OP_BRAMINZERO:
12024     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12025     cc = bracketend(cc + 1);
12026     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12027       {
12028       allocate_stack(common, 1);
12029       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12030       }
12031     else
12032       {
12033       allocate_stack(common, 2);
12034       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12035       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12036       }
12037     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12038     count_match(common);
12039     break;
12040 
12041     case OP_ONCE:
12042     case OP_SCRIPT_RUN:
12043     case OP_BRA:
12044     case OP_CBRA:
12045     case OP_COND:
12046     case OP_SBRA:
12047     case OP_SCBRA:
12048     case OP_SCOND:
12049     cc = compile_bracket_matchingpath(common, cc, parent);
12050     break;
12051 
12052     case OP_BRAZERO:
12053     if (cc[1] > OP_ASSERTBACK_NOT)
12054       cc = compile_bracket_matchingpath(common, cc, parent);
12055     else
12056       {
12057       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12058       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12059       }
12060     break;
12061 
12062     case OP_BRAPOS:
12063     case OP_CBRAPOS:
12064     case OP_SBRAPOS:
12065     case OP_SCBRAPOS:
12066     case OP_BRAPOSZERO:
12067     cc = compile_bracketpos_matchingpath(common, cc, parent);
12068     break;
12069 
12070     case OP_MARK:
12071     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12072     SLJIT_ASSERT(common->mark_ptr != 0);
12073     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12074     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12075     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12076     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12077     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12078     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12079     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12080     if (common->has_skip_arg)
12081       {
12082       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12083       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12084       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12085       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12086       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12087       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12088       }
12089     cc += 1 + 2 + cc[1];
12090     break;
12091 
12092     case OP_PRUNE:
12093     case OP_PRUNE_ARG:
12094     case OP_SKIP:
12095     case OP_SKIP_ARG:
12096     case OP_THEN:
12097     case OP_THEN_ARG:
12098     case OP_COMMIT:
12099     case OP_COMMIT_ARG:
12100     cc = compile_control_verb_matchingpath(common, cc, parent);
12101     break;
12102 
12103     case OP_FAIL:
12104     case OP_ACCEPT:
12105     case OP_ASSERT_ACCEPT:
12106     cc = compile_fail_accept_matchingpath(common, cc, parent);
12107     break;
12108 
12109     case OP_CLOSE:
12110     cc = compile_close_matchingpath(common, cc);
12111     break;
12112 
12113     case OP_SKIPZERO:
12114     cc = bracketend(cc + 1);
12115     break;
12116 
12117     default:
12118     SLJIT_UNREACHABLE();
12119     return;
12120     }
12121   if (cc == NULL)
12122     return;
12123   }
12124 
12125 if (has_then_trap)
12126   {
12127   /* Head item on backtrack. */
12128   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12129   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12130   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12131   common->then_trap = save_then_trap;
12132   }
12133 SLJIT_ASSERT(cc == ccend);
12134 }
12135 
12136 #undef PUSH_BACKTRACK
12137 #undef PUSH_BACKTRACK_NOVALUE
12138 #undef BACKTRACK_AS
12139 
12140 #define COMPILE_BACKTRACKINGPATH(current) \
12141   do \
12142     { \
12143     compile_backtrackingpath(common, (current)); \
12144     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12145       return; \
12146     } \
12147   while (0)
12148 
12149 #define CURRENT_AS(type) ((type *)current)
12150 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12151 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12152 {
12153 DEFINE_COMPILER;
12154 PCRE2_SPTR cc = current->cc;
12155 PCRE2_UCHAR opcode;
12156 PCRE2_UCHAR type;
12157 sljit_u32 max = 0, exact;
12158 struct sljit_label *label = NULL;
12159 struct sljit_jump *jump = NULL;
12160 jump_list *jumplist = NULL;
12161 PCRE2_SPTR end;
12162 int private_data_ptr = PRIVATE_DATA(cc);
12163 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12164 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12165 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12166 
12167 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12168 
12169 switch(opcode)
12170   {
12171   case OP_STAR:
12172   case OP_UPTO:
12173   if (type == OP_ANYNL || type == OP_EXTUNI)
12174     {
12175     SLJIT_ASSERT(private_data_ptr == 0);
12176     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12177     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12178     free_stack(common, 1);
12179     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12180     }
12181   else
12182     {
12183     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12184       {
12185       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12186       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12187       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12188 
12189       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12190       label = LABEL();
12191       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12192       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12194         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12195       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12196       move_back(common, NULL, TRUE);
12197       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12198       }
12199     else
12200       {
12201       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12202       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12203       move_back(common, NULL, TRUE);
12204       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12205       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12206       }
12207     JUMPHERE(jump);
12208     if (private_data_ptr == 0)
12209       free_stack(common, 2);
12210     }
12211   break;
12212 
12213   case OP_MINSTAR:
12214   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12215   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12216   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12217   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12218   set_jumps(jumplist, LABEL());
12219   if (private_data_ptr == 0)
12220     free_stack(common, 1);
12221   break;
12222 
12223   case OP_MINUPTO:
12224   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12225   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12226   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12227   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12228 
12229   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12230   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12231   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12232   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12233 
12234   set_jumps(jumplist, LABEL());
12235   if (private_data_ptr == 0)
12236     free_stack(common, 2);
12237   break;
12238 
12239   case OP_QUERY:
12240   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12241   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12242   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12243   jump = JUMP(SLJIT_JUMP);
12244   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12245   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12246   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12247   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12248   JUMPHERE(jump);
12249   if (private_data_ptr == 0)
12250     free_stack(common, 1);
12251   break;
12252 
12253   case OP_MINQUERY:
12254   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12255   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12256   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12257   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12258   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12259   set_jumps(jumplist, LABEL());
12260   JUMPHERE(jump);
12261   if (private_data_ptr == 0)
12262     free_stack(common, 1);
12263   break;
12264 
12265   case OP_EXACT:
12266   case OP_POSSTAR:
12267   case OP_POSQUERY:
12268   case OP_POSUPTO:
12269   break;
12270 
12271   default:
12272   SLJIT_UNREACHABLE();
12273   break;
12274   }
12275 
12276 set_jumps(current->topbacktracks, LABEL());
12277 }
12278 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12279 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12280 {
12281 DEFINE_COMPILER;
12282 PCRE2_SPTR cc = current->cc;
12283 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12284 PCRE2_UCHAR type;
12285 
12286 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12287 
12288 if ((type & 0x1) == 0)
12289   {
12290   /* Maximize case. */
12291   set_jumps(current->topbacktracks, LABEL());
12292   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12293   free_stack(common, 1);
12294   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12295   return;
12296   }
12297 
12298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12299 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12300 set_jumps(current->topbacktracks, LABEL());
12301 free_stack(common, ref ? 2 : 3);
12302 }
12303 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12304 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12305 {
12306 DEFINE_COMPILER;
12307 recurse_entry *entry;
12308 
12309 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12310   {
12311   entry = CURRENT_AS(recurse_backtrack)->entry;
12312   if (entry->backtrack_label == NULL)
12313     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12314   else
12315     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12316   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12317   }
12318 else
12319   compile_backtrackingpath(common, current->top);
12320 
12321 set_jumps(current->topbacktracks, LABEL());
12322 }
12323 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12324 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12325 {
12326 DEFINE_COMPILER;
12327 PCRE2_SPTR cc = current->cc;
12328 PCRE2_UCHAR bra = OP_BRA;
12329 struct sljit_jump *brajump = NULL;
12330 
12331 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12332 if (*cc == OP_BRAZERO)
12333   {
12334   bra = *cc;
12335   cc++;
12336   }
12337 
12338 if (bra == OP_BRAZERO)
12339   {
12340   SLJIT_ASSERT(current->topbacktracks == NULL);
12341   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12342   }
12343 
12344 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12345   {
12346   set_jumps(current->topbacktracks, LABEL());
12347 
12348   if (bra == OP_BRAZERO)
12349     {
12350     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12351     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12352     free_stack(common, 1);
12353     }
12354   return;
12355   }
12356 
12357 if (bra == OP_BRAZERO)
12358   {
12359   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12360     {
12361     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12362     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12363     free_stack(common, 1);
12364     return;
12365     }
12366   free_stack(common, 1);
12367   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12368   }
12369 
12370 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12371   {
12372   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12373   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12374   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12375   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12376   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12377 
12378   set_jumps(current->topbacktracks, LABEL());
12379   }
12380 else
12381   set_jumps(current->topbacktracks, LABEL());
12382 
12383 if (bra == OP_BRAZERO)
12384   {
12385   /* We know there is enough place on the stack. */
12386   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12387   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12388   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12389   JUMPHERE(brajump);
12390   }
12391 }
12392 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12393 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12394 {
12395 DEFINE_COMPILER;
12396 int opcode, stacksize, alt_count, alt_max;
12397 int offset = 0;
12398 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12399 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12400 PCRE2_SPTR cc = current->cc;
12401 PCRE2_SPTR ccbegin;
12402 PCRE2_SPTR ccprev;
12403 PCRE2_UCHAR bra = OP_BRA;
12404 PCRE2_UCHAR ket;
12405 assert_backtrack *assert;
12406 sljit_uw *next_update_addr = NULL;
12407 BOOL has_alternatives;
12408 BOOL needs_control_head = FALSE;
12409 struct sljit_jump *brazero = NULL;
12410 struct sljit_jump *alt1 = NULL;
12411 struct sljit_jump *alt2 = NULL;
12412 struct sljit_jump *once = NULL;
12413 struct sljit_jump *cond = NULL;
12414 struct sljit_label *rmin_label = NULL;
12415 struct sljit_label *exact_label = NULL;
12416 
12417 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12418   {
12419   bra = *cc;
12420   cc++;
12421   }
12422 
12423 opcode = *cc;
12424 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12425 ket = *ccbegin;
12426 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12427   {
12428   repeat_ptr = PRIVATE_DATA(ccbegin);
12429   repeat_type = PRIVATE_DATA(ccbegin + 2);
12430   repeat_count = PRIVATE_DATA(ccbegin + 3);
12431   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12432   if (repeat_type == OP_UPTO)
12433     ket = OP_KETRMAX;
12434   if (repeat_type == OP_MINUPTO)
12435     ket = OP_KETRMIN;
12436   }
12437 ccbegin = cc;
12438 cc += GET(cc, 1);
12439 has_alternatives = *cc == OP_ALT;
12440 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12441   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12442 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12443   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12444 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12445   opcode = OP_SCOND;
12446 
12447 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12448 
12449 /* Decoding the needs_control_head in framesize. */
12450 if (opcode == OP_ONCE)
12451   {
12452   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12453   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12454   }
12455 
12456 if (ket != OP_KET && repeat_type != 0)
12457   {
12458   /* TMP1 is used in OP_KETRMIN below. */
12459   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12460   free_stack(common, 1);
12461   if (repeat_type == OP_UPTO)
12462     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12463   else
12464     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12465   }
12466 
12467 if (ket == OP_KETRMAX)
12468   {
12469   if (bra == OP_BRAZERO)
12470     {
12471     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12472     free_stack(common, 1);
12473     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12474     }
12475   }
12476 else if (ket == OP_KETRMIN)
12477   {
12478   if (bra != OP_BRAMINZERO)
12479     {
12480     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12481     if (repeat_type != 0)
12482       {
12483       /* TMP1 was set a few lines above. */
12484       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12485       /* Drop STR_PTR for non-greedy plus quantifier. */
12486       if (opcode != OP_ONCE)
12487         free_stack(common, 1);
12488       }
12489     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12490       {
12491       /* Checking zero-length iteration. */
12492       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12493         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12494       else
12495         {
12496         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12497         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12498         }
12499       /* Drop STR_PTR for non-greedy plus quantifier. */
12500       if (opcode != OP_ONCE)
12501         free_stack(common, 1);
12502       }
12503     else
12504       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12505     }
12506   rmin_label = LABEL();
12507   if (repeat_type != 0)
12508     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12509   }
12510 else if (bra == OP_BRAZERO)
12511   {
12512   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12513   free_stack(common, 1);
12514   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12515   }
12516 else if (repeat_type == OP_EXACT)
12517   {
12518   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12519   exact_label = LABEL();
12520   }
12521 
12522 if (offset != 0)
12523   {
12524   if (common->capture_last_ptr != 0)
12525     {
12526     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12527     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12528     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12529     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12530     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12531     free_stack(common, 3);
12532     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12533     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12534     }
12535   else if (common->optimized_cbracket[offset >> 1] == 0)
12536     {
12537     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12538     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12539     free_stack(common, 2);
12540     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12541     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12542     }
12543   }
12544 
12545 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12546   {
12547   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12548     {
12549     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12550     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12551     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12552     }
12553   once = JUMP(SLJIT_JUMP);
12554   }
12555 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12556   {
12557   if (has_alternatives)
12558     {
12559     /* Always exactly one alternative. */
12560     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12561     free_stack(common, 1);
12562 
12563     alt_max = 2;
12564     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
12565     }
12566   }
12567 else if (has_alternatives)
12568   {
12569   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12570   free_stack(common, 1);
12571 
12572   if (alt_max > 4)
12573     {
12574     /* Table jump if alt_max is greater than 4. */
12575     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
12576     if (SLJIT_UNLIKELY(next_update_addr == NULL))
12577       return;
12578     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
12579     add_label_addr(common, next_update_addr++);
12580     }
12581   else
12582     {
12583     if (alt_max == 4)
12584       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
12585     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
12586     }
12587   }
12588 
12589 COMPILE_BACKTRACKINGPATH(current->top);
12590 if (current->topbacktracks)
12591   set_jumps(current->topbacktracks, LABEL());
12592 
12593 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12594   {
12595   /* Conditional block always has at most one alternative. */
12596   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12597     {
12598     SLJIT_ASSERT(has_alternatives);
12599     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12600     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12601       {
12602       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12603       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12604       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12605       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12606       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12607       }
12608     cond = JUMP(SLJIT_JUMP);
12609     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12610     }
12611   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12612     {
12613     SLJIT_ASSERT(has_alternatives);
12614     cond = JUMP(SLJIT_JUMP);
12615     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12616     }
12617   else
12618     SLJIT_ASSERT(!has_alternatives);
12619   }
12620 
12621 if (has_alternatives)
12622   {
12623   alt_count = sizeof(sljit_uw);
12624   do
12625     {
12626     current->top = NULL;
12627     current->topbacktracks = NULL;
12628     current->nextbacktracks = NULL;
12629     /* Conditional blocks always have an additional alternative, even if it is empty. */
12630     if (*cc == OP_ALT)
12631       {
12632       ccprev = cc + 1 + LINK_SIZE;
12633       cc += GET(cc, 1);
12634       if (opcode != OP_COND && opcode != OP_SCOND)
12635         {
12636         if (opcode != OP_ONCE)
12637           {
12638           if (private_data_ptr != 0)
12639             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12640           else
12641             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12642           }
12643         else
12644           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12645         }
12646       compile_matchingpath(common, ccprev, cc, current);
12647       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12648         return;
12649 
12650       if (opcode == OP_SCRIPT_RUN)
12651         match_script_run_common(common, private_data_ptr, current);
12652       }
12653 
12654     /* Instructions after the current alternative is successfully matched. */
12655     /* There is a similar code in compile_bracket_matchingpath. */
12656     if (opcode == OP_ONCE)
12657       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12658 
12659     stacksize = 0;
12660     if (repeat_type == OP_MINUPTO)
12661       {
12662       /* We need to preserve the counter. TMP2 will be used below. */
12663       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12664       stacksize++;
12665       }
12666     if (ket != OP_KET || bra != OP_BRA)
12667       stacksize++;
12668     if (offset != 0)
12669       {
12670       if (common->capture_last_ptr != 0)
12671         stacksize++;
12672       if (common->optimized_cbracket[offset >> 1] == 0)
12673         stacksize += 2;
12674       }
12675     if (opcode != OP_ONCE)
12676       stacksize++;
12677 
12678     if (stacksize > 0)
12679       allocate_stack(common, stacksize);
12680 
12681     stacksize = 0;
12682     if (repeat_type == OP_MINUPTO)
12683       {
12684       /* TMP2 was set above. */
12685       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12686       stacksize++;
12687       }
12688 
12689     if (ket != OP_KET || bra != OP_BRA)
12690       {
12691       if (ket != OP_KET)
12692         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12693       else
12694         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12695       stacksize++;
12696       }
12697 
12698     if (offset != 0)
12699       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12700 
12701     if (opcode != OP_ONCE)
12702       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12703 
12704     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12705       {
12706       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12707       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12708       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12709       }
12710 
12711     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12712 
12713     if (opcode != OP_ONCE)
12714       {
12715       if (alt_max > 4)
12716         add_label_addr(common, next_update_addr++);
12717       else
12718         {
12719         if (alt_count != 2 * sizeof(sljit_uw))
12720           {
12721           JUMPHERE(alt1);
12722           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
12723             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
12724           }
12725         else
12726           {
12727           JUMPHERE(alt2);
12728           if (alt_max == 4)
12729             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
12730           }
12731         }
12732       alt_count += sizeof(sljit_uw);
12733       }
12734 
12735     COMPILE_BACKTRACKINGPATH(current->top);
12736     if (current->topbacktracks)
12737       set_jumps(current->topbacktracks, LABEL());
12738     SLJIT_ASSERT(!current->nextbacktracks);
12739     }
12740   while (*cc == OP_ALT);
12741 
12742   if (cond != NULL)
12743     {
12744     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12745     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12746     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12747       {
12748       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12749       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12750       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12751       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12752       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12753       }
12754     JUMPHERE(cond);
12755     }
12756 
12757   /* Free the STR_PTR. */
12758   if (private_data_ptr == 0)
12759     free_stack(common, 1);
12760   }
12761 
12762 if (offset != 0)
12763   {
12764   /* Using both tmp register is better for instruction scheduling. */
12765   if (common->optimized_cbracket[offset >> 1] != 0)
12766     {
12767     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12768     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12769     free_stack(common, 2);
12770     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12771     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12772     }
12773   else
12774     {
12775     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12776     free_stack(common, 1);
12777     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12778     }
12779   }
12780 else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12781   {
12782   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12783   free_stack(common, 1);
12784   }
12785 else if (opcode == OP_ONCE)
12786   {
12787   cc = ccbegin + GET(ccbegin, 1);
12788   stacksize = needs_control_head ? 1 : 0;
12789 
12790   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12791     {
12792     /* Reset head and drop saved frame. */
12793     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12794     }
12795   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12796     {
12797     /* The STR_PTR must be released. */
12798     stacksize++;
12799     }
12800 
12801   if (stacksize > 0)
12802     free_stack(common, stacksize);
12803 
12804   JUMPHERE(once);
12805   /* Restore previous private_data_ptr */
12806   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12807     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12808   else if (ket == OP_KETRMIN)
12809     {
12810     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12811     /* See the comment below. */
12812     free_stack(common, 2);
12813     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12814     }
12815   }
12816 
12817 if (repeat_type == OP_EXACT)
12818   {
12819   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12820   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12821   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12822   }
12823 else if (ket == OP_KETRMAX)
12824   {
12825   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826   if (bra != OP_BRAZERO)
12827     free_stack(common, 1);
12828 
12829   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12830   if (bra == OP_BRAZERO)
12831     {
12832     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12833     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12834     JUMPHERE(brazero);
12835     free_stack(common, 1);
12836     }
12837   }
12838 else if (ket == OP_KETRMIN)
12839   {
12840   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12841 
12842   /* OP_ONCE removes everything in case of a backtrack, so we don't
12843   need to explicitly release the STR_PTR. The extra release would
12844   affect badly the free_stack(2) above. */
12845   if (opcode != OP_ONCE)
12846     free_stack(common, 1);
12847   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12848   if (opcode == OP_ONCE)
12849     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12850   else if (bra == OP_BRAMINZERO)
12851     free_stack(common, 1);
12852   }
12853 else if (bra == OP_BRAZERO)
12854   {
12855   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12856   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12857   JUMPHERE(brazero);
12858   }
12859 }
12860 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12861 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12862 {
12863 DEFINE_COMPILER;
12864 int offset;
12865 struct sljit_jump *jump;
12866 
12867 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12868   {
12869   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12870     {
12871     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12872     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12873     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12874     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12875     if (common->capture_last_ptr != 0)
12876       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12877     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12878     if (common->capture_last_ptr != 0)
12879       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12880     }
12881   set_jumps(current->topbacktracks, LABEL());
12882   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12883   return;
12884   }
12885 
12886 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12887 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12888 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12889 
12890 if (current->topbacktracks)
12891   {
12892   jump = JUMP(SLJIT_JUMP);
12893   set_jumps(current->topbacktracks, LABEL());
12894   /* Drop the stack frame. */
12895   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12896   JUMPHERE(jump);
12897   }
12898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12899 }
12900 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12901 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12902 {
12903 assert_backtrack backtrack;
12904 
12905 current->top = NULL;
12906 current->topbacktracks = NULL;
12907 current->nextbacktracks = NULL;
12908 if (current->cc[1] > OP_ASSERTBACK_NOT)
12909   {
12910   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12911   compile_bracket_matchingpath(common, current->cc, current);
12912   compile_bracket_backtrackingpath(common, current->top);
12913   }
12914 else
12915   {
12916   memset(&backtrack, 0, sizeof(backtrack));
12917   backtrack.common.cc = current->cc;
12918   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12919   /* Manual call of compile_assert_matchingpath. */
12920   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12921   }
12922 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12923 }
12924 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12925 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12926 {
12927 DEFINE_COMPILER;
12928 PCRE2_UCHAR opcode = *current->cc;
12929 struct sljit_label *loop;
12930 struct sljit_jump *jump;
12931 
12932 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12933   {
12934   if (common->then_trap != NULL)
12935     {
12936     SLJIT_ASSERT(common->control_head_ptr != 0);
12937 
12938     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12939     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12940     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12941     jump = JUMP(SLJIT_JUMP);
12942 
12943     loop = LABEL();
12944     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12945     JUMPHERE(jump);
12946     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12947     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12948     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12949     return;
12950     }
12951   else if (!common->local_quit_available && common->in_positive_assertion)
12952     {
12953     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12954     return;
12955     }
12956   }
12957 
12958 if (common->local_quit_available)
12959   {
12960   /* Abort match with a fail. */
12961   if (common->quit_label == NULL)
12962     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12963   else
12964     JUMPTO(SLJIT_JUMP, common->quit_label);
12965   return;
12966   }
12967 
12968 if (opcode == OP_SKIP_ARG)
12969   {
12970   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12971   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12972   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12973   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
12974 
12975   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12976   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12977   return;
12978   }
12979 
12980 if (opcode == OP_SKIP)
12981   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12982 else
12983   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12984 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12985 }
12986 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)12987 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12988 {
12989 DEFINE_COMPILER;
12990 struct sljit_jump *jump;
12991 int size;
12992 
12993 if (CURRENT_AS(then_trap_backtrack)->then_trap)
12994   {
12995   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12996   return;
12997   }
12998 
12999 size = CURRENT_AS(then_trap_backtrack)->framesize;
13000 size = 3 + (size < 0 ? 0 : size);
13001 
13002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13003 free_stack(common, size);
13004 jump = JUMP(SLJIT_JUMP);
13005 
13006 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13007 /* STACK_TOP is set by THEN. */
13008 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13009   {
13010   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13011   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13012   }
13013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13014 free_stack(common, 3);
13015 
13016 JUMPHERE(jump);
13017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13018 }
13019 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13020 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13021 {
13022 DEFINE_COMPILER;
13023 then_trap_backtrack *save_then_trap = common->then_trap;
13024 
13025 while (current)
13026   {
13027   if (current->nextbacktracks != NULL)
13028     set_jumps(current->nextbacktracks, LABEL());
13029   switch(*current->cc)
13030     {
13031     case OP_SET_SOM:
13032     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033     free_stack(common, 1);
13034     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13035     break;
13036 
13037     case OP_STAR:
13038     case OP_MINSTAR:
13039     case OP_PLUS:
13040     case OP_MINPLUS:
13041     case OP_QUERY:
13042     case OP_MINQUERY:
13043     case OP_UPTO:
13044     case OP_MINUPTO:
13045     case OP_EXACT:
13046     case OP_POSSTAR:
13047     case OP_POSPLUS:
13048     case OP_POSQUERY:
13049     case OP_POSUPTO:
13050     case OP_STARI:
13051     case OP_MINSTARI:
13052     case OP_PLUSI:
13053     case OP_MINPLUSI:
13054     case OP_QUERYI:
13055     case OP_MINQUERYI:
13056     case OP_UPTOI:
13057     case OP_MINUPTOI:
13058     case OP_EXACTI:
13059     case OP_POSSTARI:
13060     case OP_POSPLUSI:
13061     case OP_POSQUERYI:
13062     case OP_POSUPTOI:
13063     case OP_NOTSTAR:
13064     case OP_NOTMINSTAR:
13065     case OP_NOTPLUS:
13066     case OP_NOTMINPLUS:
13067     case OP_NOTQUERY:
13068     case OP_NOTMINQUERY:
13069     case OP_NOTUPTO:
13070     case OP_NOTMINUPTO:
13071     case OP_NOTEXACT:
13072     case OP_NOTPOSSTAR:
13073     case OP_NOTPOSPLUS:
13074     case OP_NOTPOSQUERY:
13075     case OP_NOTPOSUPTO:
13076     case OP_NOTSTARI:
13077     case OP_NOTMINSTARI:
13078     case OP_NOTPLUSI:
13079     case OP_NOTMINPLUSI:
13080     case OP_NOTQUERYI:
13081     case OP_NOTMINQUERYI:
13082     case OP_NOTUPTOI:
13083     case OP_NOTMINUPTOI:
13084     case OP_NOTEXACTI:
13085     case OP_NOTPOSSTARI:
13086     case OP_NOTPOSPLUSI:
13087     case OP_NOTPOSQUERYI:
13088     case OP_NOTPOSUPTOI:
13089     case OP_TYPESTAR:
13090     case OP_TYPEMINSTAR:
13091     case OP_TYPEPLUS:
13092     case OP_TYPEMINPLUS:
13093     case OP_TYPEQUERY:
13094     case OP_TYPEMINQUERY:
13095     case OP_TYPEUPTO:
13096     case OP_TYPEMINUPTO:
13097     case OP_TYPEEXACT:
13098     case OP_TYPEPOSSTAR:
13099     case OP_TYPEPOSPLUS:
13100     case OP_TYPEPOSQUERY:
13101     case OP_TYPEPOSUPTO:
13102     case OP_CLASS:
13103     case OP_NCLASS:
13104 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13105     case OP_XCLASS:
13106 #endif
13107     compile_iterator_backtrackingpath(common, current);
13108     break;
13109 
13110     case OP_REF:
13111     case OP_REFI:
13112     case OP_DNREF:
13113     case OP_DNREFI:
13114     compile_ref_iterator_backtrackingpath(common, current);
13115     break;
13116 
13117     case OP_RECURSE:
13118     compile_recurse_backtrackingpath(common, current);
13119     break;
13120 
13121     case OP_ASSERT:
13122     case OP_ASSERT_NOT:
13123     case OP_ASSERTBACK:
13124     case OP_ASSERTBACK_NOT:
13125     compile_assert_backtrackingpath(common, current);
13126     break;
13127 
13128     case OP_ONCE:
13129     case OP_SCRIPT_RUN:
13130     case OP_BRA:
13131     case OP_CBRA:
13132     case OP_COND:
13133     case OP_SBRA:
13134     case OP_SCBRA:
13135     case OP_SCOND:
13136     compile_bracket_backtrackingpath(common, current);
13137     break;
13138 
13139     case OP_BRAZERO:
13140     if (current->cc[1] > OP_ASSERTBACK_NOT)
13141       compile_bracket_backtrackingpath(common, current);
13142     else
13143       compile_assert_backtrackingpath(common, current);
13144     break;
13145 
13146     case OP_BRAPOS:
13147     case OP_CBRAPOS:
13148     case OP_SBRAPOS:
13149     case OP_SCBRAPOS:
13150     case OP_BRAPOSZERO:
13151     compile_bracketpos_backtrackingpath(common, current);
13152     break;
13153 
13154     case OP_BRAMINZERO:
13155     compile_braminzero_backtrackingpath(common, current);
13156     break;
13157 
13158     case OP_MARK:
13159     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13160     if (common->has_skip_arg)
13161       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13162     free_stack(common, common->has_skip_arg ? 5 : 1);
13163     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13164     if (common->has_skip_arg)
13165       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13166     break;
13167 
13168     case OP_THEN:
13169     case OP_THEN_ARG:
13170     case OP_PRUNE:
13171     case OP_PRUNE_ARG:
13172     case OP_SKIP:
13173     case OP_SKIP_ARG:
13174     compile_control_verb_backtrackingpath(common, current);
13175     break;
13176 
13177     case OP_COMMIT:
13178     case OP_COMMIT_ARG:
13179     if (!common->local_quit_available)
13180       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13181     if (common->quit_label == NULL)
13182       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13183     else
13184       JUMPTO(SLJIT_JUMP, common->quit_label);
13185     break;
13186 
13187     case OP_CALLOUT:
13188     case OP_CALLOUT_STR:
13189     case OP_FAIL:
13190     case OP_ACCEPT:
13191     case OP_ASSERT_ACCEPT:
13192     set_jumps(current->topbacktracks, LABEL());
13193     break;
13194 
13195     case OP_THEN_TRAP:
13196     /* A virtual opcode for then traps. */
13197     compile_then_trap_backtrackingpath(common, current);
13198     break;
13199 
13200     default:
13201     SLJIT_UNREACHABLE();
13202     break;
13203     }
13204   current = current->prev;
13205   }
13206 common->then_trap = save_then_trap;
13207 }
13208 
compile_recurse(compiler_common * common)13209 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13210 {
13211 DEFINE_COMPILER;
13212 PCRE2_SPTR cc = common->start + common->currententry->start;
13213 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13214 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13215 BOOL needs_control_head;
13216 BOOL has_quit;
13217 BOOL has_accept;
13218 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13219 int alt_count, alt_max, local_size;
13220 backtrack_common altbacktrack;
13221 jump_list *match = NULL;
13222 sljit_uw *next_update_addr = NULL;
13223 struct sljit_jump *alt1 = NULL;
13224 struct sljit_jump *alt2 = NULL;
13225 struct sljit_jump *accept_exit = NULL;
13226 struct sljit_label *quit;
13227 
13228 /* Recurse captures then. */
13229 common->then_trap = NULL;
13230 
13231 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13232 
13233 alt_max = no_alternatives(cc);
13234 alt_count = 0;
13235 
13236 /* Matching path. */
13237 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13238 common->currententry->entry_label = LABEL();
13239 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13240 
13241 sljit_emit_fast_enter(compiler, TMP2, 0);
13242 count_match(common);
13243 
13244 local_size = (alt_max > 1) ? 2 : 1;
13245 
13246 /* (Reversed) stack layout:
13247    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13248 
13249 allocate_stack(common, private_data_size + local_size);
13250 /* Save return address. */
13251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13252 
13253 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13254 
13255 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13257 
13258 if (needs_control_head)
13259   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13260 
13261 if (alt_max > 1)
13262   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13263 
13264 memset(&altbacktrack, 0, sizeof(backtrack_common));
13265 common->quit_label = NULL;
13266 common->accept_label = NULL;
13267 common->quit = NULL;
13268 common->accept = NULL;
13269 altbacktrack.cc = ccbegin;
13270 cc += GET(cc, 1);
13271 while (1)
13272   {
13273   altbacktrack.top = NULL;
13274   altbacktrack.topbacktracks = NULL;
13275 
13276   if (altbacktrack.cc != ccbegin)
13277     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13278 
13279   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13280   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13281     return;
13282 
13283   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13284   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13285 
13286   if (alt_max > 1 || has_accept)
13287     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13288 
13289   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13290 
13291   if (alt_count == 0)
13292     {
13293     /* Backtracking path entry. */
13294     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13295     common->currententry->backtrack_label = LABEL();
13296     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13297 
13298     sljit_emit_fast_enter(compiler, TMP1, 0);
13299 
13300     if (has_accept)
13301       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
13302 
13303     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13304     /* Save return address. */
13305     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13306 
13307     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13308 
13309     if (alt_max > 1)
13310       {
13311       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13312       free_stack(common, 2);
13313 
13314       if (alt_max > 4)
13315         {
13316           /* Table jump if alt_max is greater than 4. */
13317           next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
13318           if (SLJIT_UNLIKELY(next_update_addr == NULL))
13319             return;
13320           sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
13321           add_label_addr(common, next_update_addr++);
13322         }
13323       else
13324         {
13325         if (alt_max == 4)
13326           alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
13327         alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
13328         }
13329       }
13330     else
13331       free_stack(common, has_accept ? 2 : 1);
13332     }
13333   else if (alt_max > 4)
13334     add_label_addr(common, next_update_addr++);
13335   else
13336     {
13337     if (alt_count != 2 * sizeof(sljit_uw))
13338       {
13339       JUMPHERE(alt1);
13340       if (alt_max == 3 && alt_count == sizeof(sljit_uw))
13341         alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
13342       }
13343     else
13344       {
13345       JUMPHERE(alt2);
13346       if (alt_max == 4)
13347         alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
13348       }
13349     }
13350 
13351   alt_count += sizeof(sljit_uw);
13352 
13353   compile_backtrackingpath(common, altbacktrack.top);
13354   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13355     return;
13356   set_jumps(altbacktrack.topbacktracks, LABEL());
13357 
13358   if (*cc != OP_ALT)
13359     break;
13360 
13361   altbacktrack.cc = cc + 1 + LINK_SIZE;
13362   cc += GET(cc, 1);
13363   }
13364 
13365 /* No alternative is matched. */
13366 
13367 quit = LABEL();
13368 
13369 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13370 
13371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13372 free_stack(common, private_data_size + local_size);
13373 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13374 sljit_emit_fast_return(compiler, TMP2, 0);
13375 
13376 if (common->quit != NULL)
13377   {
13378   SLJIT_ASSERT(has_quit);
13379 
13380   set_jumps(common->quit, LABEL());
13381   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13382   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13383   JUMPTO(SLJIT_JUMP, quit);
13384   }
13385 
13386 if (has_accept)
13387   {
13388   JUMPHERE(accept_exit);
13389   free_stack(common, 2);
13390 
13391   /* Save return address. */
13392   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13393 
13394   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13395 
13396   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13397   free_stack(common, private_data_size + local_size);
13398   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13399   sljit_emit_fast_return(compiler, TMP2, 0);
13400   }
13401 
13402 if (common->accept != NULL)
13403   {
13404   SLJIT_ASSERT(has_accept);
13405 
13406   set_jumps(common->accept, LABEL());
13407 
13408   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13409   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13410 
13411   allocate_stack(common, 2);
13412   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13413   }
13414 
13415 set_jumps(match, LABEL());
13416 
13417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13418 
13419 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13420 
13421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13423 sljit_emit_fast_return(compiler, TMP2, 0);
13424 }
13425 
13426 #undef COMPILE_BACKTRACKINGPATH
13427 #undef CURRENT_AS
13428 
13429 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13430   (PCRE2_JIT_INVALID_UTF)
13431 
jit_compile(pcre2_code * code,sljit_u32 mode)13432 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13433 {
13434 pcre2_real_code *re = (pcre2_real_code *)code;
13435 struct sljit_compiler *compiler;
13436 backtrack_common rootbacktrack;
13437 compiler_common common_data;
13438 compiler_common *common = &common_data;
13439 const sljit_u8 *tables = re->tables;
13440 void *allocator_data = &re->memctl;
13441 int private_data_size;
13442 PCRE2_SPTR ccend;
13443 executable_functions *functions;
13444 void *executable_func;
13445 sljit_uw executable_size;
13446 sljit_uw total_length;
13447 label_addr_list *label_addr;
13448 struct sljit_label *mainloop_label = NULL;
13449 struct sljit_label *continue_match_label;
13450 struct sljit_label *empty_match_found_label = NULL;
13451 struct sljit_label *empty_match_backtrack_label = NULL;
13452 struct sljit_label *reset_match_label;
13453 struct sljit_label *quit_label;
13454 struct sljit_jump *jump;
13455 struct sljit_jump *minlength_check_failed = NULL;
13456 struct sljit_jump *reqbyte_notfound = NULL;
13457 struct sljit_jump *empty_match = NULL;
13458 struct sljit_jump *end_anchor_failed = NULL;
13459 
13460 SLJIT_ASSERT(tables);
13461 
13462 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13463 memset(common, 0, sizeof(compiler_common));
13464 common->re = re;
13465 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13466 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13467 
13468 #ifdef SUPPORT_UNICODE
13469 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13470 #endif /* SUPPORT_UNICODE */
13471 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13472 
13473 common->start = rootbacktrack.cc;
13474 common->read_only_data_head = NULL;
13475 common->fcc = tables + fcc_offset;
13476 common->lcc = (sljit_sw)(tables + lcc_offset);
13477 common->mode = mode;
13478 common->might_be_empty = re->minlength == 0;
13479 common->nltype = NLTYPE_FIXED;
13480 switch(re->newline_convention)
13481   {
13482   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13483   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13484   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13485   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13486   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13487   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13488   default: return PCRE2_ERROR_INTERNAL;
13489   }
13490 common->nlmax = READ_CHAR_MAX;
13491 common->nlmin = 0;
13492 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13493   common->bsr_nltype = NLTYPE_ANY;
13494 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13495   common->bsr_nltype = NLTYPE_ANYCRLF;
13496 else
13497   {
13498 #ifdef BSR_ANYCRLF
13499   common->bsr_nltype = NLTYPE_ANYCRLF;
13500 #else
13501   common->bsr_nltype = NLTYPE_ANY;
13502 #endif
13503   }
13504 common->bsr_nlmax = READ_CHAR_MAX;
13505 common->bsr_nlmin = 0;
13506 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13507 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13508 common->name_count = re->name_count;
13509 common->name_entry_size = re->name_entry_size;
13510 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13511 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13512 #ifdef SUPPORT_UNICODE
13513 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13514 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13515 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
13516 if (common->utf)
13517   {
13518   if (common->nltype == NLTYPE_ANY)
13519     common->nlmax = 0x2029;
13520   else if (common->nltype == NLTYPE_ANYCRLF)
13521     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13522   else
13523     {
13524     /* We only care about the first newline character. */
13525     common->nlmax = common->newline & 0xff;
13526     }
13527 
13528   if (common->nltype == NLTYPE_FIXED)
13529     common->nlmin = common->newline & 0xff;
13530   else
13531     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13532 
13533   if (common->bsr_nltype == NLTYPE_ANY)
13534     common->bsr_nlmax = 0x2029;
13535   else
13536     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13537   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13538   }
13539 else
13540   common->invalid_utf = FALSE;
13541 #endif /* SUPPORT_UNICODE */
13542 ccend = bracketend(common->start);
13543 
13544 /* Calculate the local space size on the stack. */
13545 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13546 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13547 if (!common->optimized_cbracket)
13548   return PCRE2_ERROR_NOMEMORY;
13549 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13550 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13551 #else
13552 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13553 #endif
13554 
13555 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13556 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13557 common->capture_last_ptr = common->ovector_start;
13558 common->ovector_start += sizeof(sljit_sw);
13559 #endif
13560 if (!check_opcode_types(common, common->start, ccend))
13561   {
13562   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13563   return PCRE2_ERROR_NOMEMORY;
13564   }
13565 
13566 /* Checking flags and updating ovector_start. */
13567 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13568   {
13569   common->req_char_ptr = common->ovector_start;
13570   common->ovector_start += sizeof(sljit_sw);
13571   }
13572 if (mode != PCRE2_JIT_COMPLETE)
13573   {
13574   common->start_used_ptr = common->ovector_start;
13575   common->ovector_start += sizeof(sljit_sw);
13576   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13577     {
13578     common->hit_start = common->ovector_start;
13579     common->ovector_start += sizeof(sljit_sw);
13580     }
13581   }
13582 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13583   {
13584   common->match_end_ptr = common->ovector_start;
13585   common->ovector_start += sizeof(sljit_sw);
13586   }
13587 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13588 common->control_head_ptr = 1;
13589 #endif
13590 if (common->control_head_ptr != 0)
13591   {
13592   common->control_head_ptr = common->ovector_start;
13593   common->ovector_start += sizeof(sljit_sw);
13594   }
13595 if (common->has_set_som)
13596   {
13597   /* Saving the real start pointer is necessary. */
13598   common->start_ptr = common->ovector_start;
13599   common->ovector_start += sizeof(sljit_sw);
13600   }
13601 
13602 /* Aligning ovector to even number of sljit words. */
13603 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13604   common->ovector_start += sizeof(sljit_sw);
13605 
13606 if (common->start_ptr == 0)
13607   common->start_ptr = OVECTOR(0);
13608 
13609 /* Capturing brackets cannot be optimized if callouts are allowed. */
13610 if (common->capture_last_ptr != 0)
13611   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13612 
13613 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13614 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13615 
13616 total_length = ccend - common->start;
13617 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13618 if (!common->private_data_ptrs)
13619   {
13620   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13621   return PCRE2_ERROR_NOMEMORY;
13622   }
13623 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13624 
13625 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13626 set_private_data_ptrs(common, &private_data_size, ccend);
13627 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13628   {
13629   if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
13630     detect_fast_fail(common, common->start, &private_data_size, 4);
13631   }
13632 
13633 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
13634 
13635 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13636   {
13637   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13638   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13639   return PCRE2_ERROR_NOMEMORY;
13640   }
13641 
13642 if (common->has_then)
13643   {
13644   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13645   memset(common->then_offsets, 0, total_length);
13646   set_then_offsets(common, common->start, NULL);
13647   }
13648 
13649 compiler = sljit_create_compiler(allocator_data);
13650 if (!compiler)
13651   {
13652   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13653   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13654   return PCRE2_ERROR_NOMEMORY;
13655   }
13656 common->compiler = compiler;
13657 
13658 /* Main pcre_jit_exec entry. */
13659 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13660 
13661 /* Register init. */
13662 reset_ovector(common, (re->top_bracket + 1) * 2);
13663 if (common->req_char_ptr != 0)
13664   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13665 
13666 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13669 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13671 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13672 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13673 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13674 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13676 
13677 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
13678   reset_fast_fail(common);
13679 
13680 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13681   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13682 if (common->mark_ptr != 0)
13683   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13684 if (common->control_head_ptr != 0)
13685   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13686 
13687 /* Main part of the matching */
13688 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13689   {
13690   mainloop_label = mainloop_entry(common);
13691   continue_match_label = LABEL();
13692   /* Forward search if possible. */
13693   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13694     {
13695     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13696       ;
13697     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13698       fast_forward_first_char(common);
13699     else if ((re->flags & PCRE2_STARTLINE) != 0)
13700       fast_forward_newline(common);
13701     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13702       fast_forward_start_bits(common);
13703     }
13704   }
13705 else
13706   continue_match_label = LABEL();
13707 
13708 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13709   {
13710   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13711   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13712   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13713   }
13714 if (common->req_char_ptr != 0)
13715   reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13716 
13717 /* Store the current STR_PTR in OVECTOR(0). */
13718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13719 /* Copy the limit of allowed recursions. */
13720 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13721 if (common->capture_last_ptr != 0)
13722   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13723 if (common->fast_forward_bc_ptr != NULL)
13724   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
13725 
13726 if (common->start_ptr != OVECTOR(0))
13727   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13728 
13729 /* Copy the beginning of the string. */
13730 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13731   {
13732   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13733   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13734   JUMPHERE(jump);
13735   }
13736 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13737   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13738 
13739 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13740 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13741   {
13742   sljit_free_compiler(compiler);
13743   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13744   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13745   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13746   return PCRE2_ERROR_NOMEMORY;
13747   }
13748 
13749 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13750   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13751 
13752 if (common->might_be_empty)
13753   {
13754   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13755   empty_match_found_label = LABEL();
13756   }
13757 
13758 common->accept_label = LABEL();
13759 if (common->accept != NULL)
13760   set_jumps(common->accept, common->accept_label);
13761 
13762 /* This means we have a match. Update the ovector. */
13763 copy_ovector(common, re->top_bracket + 1);
13764 common->quit_label = common->abort_label = LABEL();
13765 if (common->quit != NULL)
13766   set_jumps(common->quit, common->quit_label);
13767 if (common->abort != NULL)
13768   set_jumps(common->abort, common->abort_label);
13769 if (minlength_check_failed != NULL)
13770   SET_LABEL(minlength_check_failed, common->abort_label);
13771 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13772 
13773 if (common->failed_match != NULL)
13774   {
13775   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13776   set_jumps(common->failed_match, LABEL());
13777   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13778   JUMPTO(SLJIT_JUMP, common->abort_label);
13779   }
13780 
13781 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13782   JUMPHERE(end_anchor_failed);
13783 
13784 if (mode != PCRE2_JIT_COMPLETE)
13785   {
13786   common->partialmatchlabel = LABEL();
13787   set_jumps(common->partialmatch, common->partialmatchlabel);
13788   return_with_partial_match(common, common->quit_label);
13789   }
13790 
13791 if (common->might_be_empty)
13792   empty_match_backtrack_label = LABEL();
13793 compile_backtrackingpath(common, rootbacktrack.top);
13794 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13795   {
13796   sljit_free_compiler(compiler);
13797   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13798   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13799   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13800   return PCRE2_ERROR_NOMEMORY;
13801   }
13802 
13803 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13804 reset_match_label = LABEL();
13805 
13806 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13807   {
13808   /* Update hit_start only in the first time. */
13809   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13810   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13811   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13812   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13813   JUMPHERE(jump);
13814   }
13815 
13816 /* Check we have remaining characters. */
13817 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13818   {
13819   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13820   }
13821 
13822 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13823     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
13824 
13825 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13826   {
13827   if (common->ff_newline_shortcut != NULL)
13828     {
13829     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13830     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13831       {
13832       if (common->match_end_ptr != 0)
13833         {
13834         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13835         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13836         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13837         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13838         }
13839       else
13840         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13841       }
13842     }
13843   else
13844     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13845   }
13846 
13847 /* No more remaining characters. */
13848 if (reqbyte_notfound != NULL)
13849   JUMPHERE(reqbyte_notfound);
13850 
13851 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13852   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13853 
13854 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13855 JUMPTO(SLJIT_JUMP, common->quit_label);
13856 
13857 flush_stubs(common);
13858 
13859 if (common->might_be_empty)
13860   {
13861   JUMPHERE(empty_match);
13862   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13863   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13864   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13865   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13866   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13867   JUMPTO(SLJIT_ZERO, empty_match_found_label);
13868   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13869   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13870   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13871   }
13872 
13873 common->fast_forward_bc_ptr = NULL;
13874 common->fast_fail_start_ptr = 0;
13875 common->fast_fail_end_ptr = 0;
13876 common->currententry = common->entries;
13877 common->local_quit_available = TRUE;
13878 quit_label = common->quit_label;
13879 while (common->currententry != NULL)
13880   {
13881   /* Might add new entries. */
13882   compile_recurse(common);
13883   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13884     {
13885     sljit_free_compiler(compiler);
13886     SLJIT_FREE(common->optimized_cbracket, allocator_data);
13887     SLJIT_FREE(common->private_data_ptrs, allocator_data);
13888     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13889     return PCRE2_ERROR_NOMEMORY;
13890     }
13891   flush_stubs(common);
13892   common->currententry = common->currententry->next;
13893   }
13894 common->local_quit_available = FALSE;
13895 common->quit_label = quit_label;
13896 
13897 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13898 /* This is a (really) rare case. */
13899 set_jumps(common->stackalloc, LABEL());
13900 /* RETURN_ADDR is not a saved register. */
13901 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13902 
13903 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13904 
13905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13906 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13907 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13908 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13909 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13910 
13911 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13912 
13913 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13914 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13915 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13917 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13918 sljit_emit_fast_return(compiler, TMP1, 0);
13919 
13920 /* Allocation failed. */
13921 JUMPHERE(jump);
13922 /* We break the return address cache here, but this is a really rare case. */
13923 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13924 JUMPTO(SLJIT_JUMP, common->quit_label);
13925 
13926 /* Call limit reached. */
13927 set_jumps(common->calllimit, LABEL());
13928 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13929 JUMPTO(SLJIT_JUMP, common->quit_label);
13930 
13931 if (common->revertframes != NULL)
13932   {
13933   set_jumps(common->revertframes, LABEL());
13934   do_revertframes(common);
13935   }
13936 if (common->wordboundary != NULL)
13937   {
13938   set_jumps(common->wordboundary, LABEL());
13939   check_wordboundary(common);
13940   }
13941 if (common->anynewline != NULL)
13942   {
13943   set_jumps(common->anynewline, LABEL());
13944   check_anynewline(common);
13945   }
13946 if (common->hspace != NULL)
13947   {
13948   set_jumps(common->hspace, LABEL());
13949   check_hspace(common);
13950   }
13951 if (common->vspace != NULL)
13952   {
13953   set_jumps(common->vspace, LABEL());
13954   check_vspace(common);
13955   }
13956 if (common->casefulcmp != NULL)
13957   {
13958   set_jumps(common->casefulcmp, LABEL());
13959   do_casefulcmp(common);
13960   }
13961 if (common->caselesscmp != NULL)
13962   {
13963   set_jumps(common->caselesscmp, LABEL());
13964   do_caselesscmp(common);
13965   }
13966 if (common->reset_match != NULL)
13967   {
13968   set_jumps(common->reset_match, LABEL());
13969   do_reset_match(common, (re->top_bracket + 1) * 2);
13970   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13971   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13972   JUMPTO(SLJIT_JUMP, reset_match_label);
13973   }
13974 #ifdef SUPPORT_UNICODE
13975 #if PCRE2_CODE_UNIT_WIDTH == 8
13976 if (common->utfreadchar != NULL)
13977   {
13978   set_jumps(common->utfreadchar, LABEL());
13979   do_utfreadchar(common);
13980   }
13981 if (common->utfreadtype8 != NULL)
13982   {
13983   set_jumps(common->utfreadtype8, LABEL());
13984   do_utfreadtype8(common);
13985   }
13986 if (common->utfpeakcharback != NULL)
13987   {
13988   set_jumps(common->utfpeakcharback, LABEL());
13989   do_utfpeakcharback(common);
13990   }
13991 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13992 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13993 if (common->utfreadchar_invalid != NULL)
13994   {
13995   set_jumps(common->utfreadchar_invalid, LABEL());
13996   do_utfreadchar_invalid(common);
13997   }
13998 if (common->utfreadnewline_invalid != NULL)
13999   {
14000   set_jumps(common->utfreadnewline_invalid, LABEL());
14001   do_utfreadnewline_invalid(common);
14002   }
14003 if (common->utfmoveback_invalid)
14004   {
14005   set_jumps(common->utfmoveback_invalid, LABEL());
14006   do_utfmoveback_invalid(common);
14007   }
14008 if (common->utfpeakcharback_invalid)
14009   {
14010   set_jumps(common->utfpeakcharback_invalid, LABEL());
14011   do_utfpeakcharback_invalid(common);
14012   }
14013 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14014 if (common->getucd != NULL)
14015   {
14016   set_jumps(common->getucd, LABEL());
14017   do_getucd(common);
14018   }
14019 if (common->getucdtype != NULL)
14020   {
14021   set_jumps(common->getucdtype, LABEL());
14022   do_getucdtype(common);
14023   }
14024 #endif /* SUPPORT_UNICODE */
14025 
14026 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14027 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14028 
14029 executable_func = sljit_generate_code(compiler);
14030 executable_size = sljit_get_generated_code_size(compiler);
14031 label_addr = common->label_addrs;
14032 while (label_addr != NULL)
14033   {
14034   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
14035   label_addr = label_addr->next;
14036   }
14037 sljit_free_compiler(compiler);
14038 if (executable_func == NULL)
14039   {
14040   PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
14041   return PCRE2_ERROR_NOMEMORY;
14042   }
14043 
14044 /* Reuse the function descriptor if possible. */
14045 if (re->executable_jit != NULL)
14046   functions = (executable_functions *)re->executable_jit;
14047 else
14048   {
14049   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14050   if (functions == NULL)
14051     {
14052     /* This case is highly unlikely since we just recently
14053     freed a lot of memory. Not impossible though. */
14054     sljit_free_code(executable_func);
14055     PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
14056     return PCRE2_ERROR_NOMEMORY;
14057     }
14058   memset(functions, 0, sizeof(executable_functions));
14059   functions->top_bracket = re->top_bracket + 1;
14060   functions->limit_match = re->limit_match;
14061   re->executable_jit = functions;
14062   }
14063 
14064 /* Turn mode into an index. */
14065 if (mode == PCRE2_JIT_COMPLETE)
14066   mode = 0;
14067 else
14068   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14069 
14070 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14071 functions->executable_funcs[mode] = executable_func;
14072 functions->read_only_data_heads[mode] = common->read_only_data_head;
14073 functions->executable_sizes[mode] = executable_size;
14074 return 0;
14075 }
14076 
14077 #endif
14078 
14079 /*************************************************
14080 *        JIT compile a Regular Expression        *
14081 *************************************************/
14082 
14083 /* This function used JIT to convert a previously-compiled pattern into machine
14084 code.
14085 
14086 Arguments:
14087   code          a compiled pattern
14088   options       JIT option bits
14089 
14090 Returns:        0: success or (*NOJIT) was used
14091                <0: an error code
14092 */
14093 
14094 #define PUBLIC_JIT_COMPILE_OPTIONS \
14095   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14096 
14097 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14098 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14099 {
14100 #ifndef SUPPORT_JIT
14101 
14102 (void)code;
14103 (void)options;
14104 return PCRE2_ERROR_JIT_BADOPTION;
14105 
14106 #else  /* SUPPORT_JIT */
14107 
14108 pcre2_real_code *re = (pcre2_real_code *)code;
14109 executable_functions *functions;
14110 uint32_t excluded_options;
14111 int result;
14112 
14113 if (code == NULL)
14114   return PCRE2_ERROR_NULL;
14115 
14116 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14117   return PCRE2_ERROR_JIT_BADOPTION;
14118 
14119 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14120 
14121 functions = (executable_functions *)re->executable_jit;
14122 
14123 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14124     || functions->executable_funcs[0] == NULL)) {
14125   excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14126   result = jit_compile(code, options & ~excluded_options);
14127   if (result != 0)
14128     return result;
14129   }
14130 
14131 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14132     || functions->executable_funcs[1] == NULL)) {
14133   excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14134   result = jit_compile(code, options & ~excluded_options);
14135   if (result != 0)
14136     return result;
14137   }
14138 
14139 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14140     || functions->executable_funcs[2] == NULL)) {
14141   excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14142   result = jit_compile(code, options & ~excluded_options);
14143   if (result != 0)
14144     return result;
14145   }
14146 
14147 return 0;
14148 
14149 #endif  /* SUPPORT_JIT */
14150 }
14151 
14152 /* JIT compiler uses an all-in-one approach. This improves security,
14153    since the code generator functions are not exported. */
14154 
14155 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14156 
14157 #include "pcre2_jit_match.c"
14158 #include "pcre2_jit_misc.c"
14159 
14160 /* End of pcre2_jit_compile.c */
14161