1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include "pcre2_internal.h"
46
47 #ifdef SUPPORT_JIT
48
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77
78 #include "sljit/sljitLir.c"
79
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83
84 /* Defines for debugging purposes. */
85
86 /* 1 - Use unoptimized capturing brackets.
87 2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114
115 'ab' - 'a' and 'b' regexps are concatenated
116 'a+' - 'a' is the sub-expression of the '+' operator
117
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123
124 Greedy star operator (*) :
125 Matching path: match happens.
126 Backtrack path: match failed.
127 Non-greedy star operator (*?) :
128 Matching path: no need to perform a match.
129 Backtrack path: match is required.
130
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134
135 A(B|C)D
136
137 The generated code will be the following:
138
139 A matching path
140 '(' matching path (pushing arguments to the stack)
141 B matching path
142 ')' matching path (pushing arguments to the stack)
143 D matching path
144 return with successful match
145
146 D backtrack path
147 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148 B backtrack path
149 C expected path
150 jump to D matching path
151 C backtrack path
152 A backtrack path
153
154 Notice, that the order of backtrack code paths are the opposite of the fast
155 code paths. In this way the topmost value on the stack is always belong
156 to the current backtrack code path. The backtrack path must check
157 whether there is a next alternative. If so, it needs to jump back to
158 the matching path eventually. Otherwise it needs to clear out its own stack
159 frame and continue the execution on the backtrack code paths.
160 */
161
162 /*
163 Saved stack frames:
164
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172
173 Thus we can restore the private data to a particular point in the stack.
174 */
175
176 typedef struct jit_arguments {
177 /* Pointers first. */
178 struct sljit_stack *stack;
179 PCRE2_SPTR str;
180 PCRE2_SPTR begin;
181 PCRE2_SPTR end;
182 pcre2_match_data *match_data;
183 PCRE2_SPTR startchar_ptr;
184 PCRE2_UCHAR *mark_ptr;
185 int (*callout)(pcre2_callout_block *, void *);
186 void *callout_data;
187 /* Everything else after. */
188 sljit_uw offset_limit;
189 sljit_u32 limit_match;
190 sljit_u32 oveccount;
191 sljit_u32 options;
192 } jit_arguments;
193
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195
196 typedef struct executable_functions {
197 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_u32 top_bracket;
201 sljit_u32 limit_match;
202 } executable_functions;
203
204 typedef struct jump_list {
205 struct sljit_jump *jump;
206 struct jump_list *next;
207 } jump_list;
208
209 typedef struct stub_list {
210 struct sljit_jump *start;
211 struct sljit_label *quit;
212 struct stub_list *next;
213 } stub_list;
214
215 typedef struct label_addr_list {
216 struct sljit_label *label;
217 sljit_uw *update_addr;
218 struct label_addr_list *next;
219 } label_addr_list;
220
221 enum frame_types {
222 no_frame = -1,
223 no_stack = -2
224 };
225
226 enum control_types {
227 type_mark = 0,
228 type_then_trap = 1
229 };
230
231 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
232
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238 /* Concatenation stack. */
239 struct backtrack_common *prev;
240 jump_list *nextbacktracks;
241 /* Internal stack (for component operators). */
242 struct backtrack_common *top;
243 jump_list *topbacktracks;
244 /* Opcode pointer. */
245 PCRE2_SPTR cc;
246 } backtrack_common;
247
248 typedef struct assert_backtrack {
249 backtrack_common common;
250 jump_list *condfailed;
251 /* Less than 0 if a frame is not needed. */
252 int framesize;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 /* For iterators. */
256 struct sljit_label *matchingpath;
257 } assert_backtrack;
258
259 typedef struct bracket_backtrack {
260 backtrack_common common;
261 /* Where to coninue if an alternative is successfully matched. */
262 struct sljit_label *alternative_matchingpath;
263 /* For rmin and rmax iterators. */
264 struct sljit_label *recursive_matchingpath;
265 /* For greedy ? operator. */
266 struct sljit_label *zero_matchingpath;
267 /* Contains the branches of a failed condition. */
268 union {
269 /* Both for OP_COND, OP_SCOND. */
270 jump_list *condfailed;
271 assert_backtrack *assert;
272 /* For OP_ONCE. Less than 0 if not needed. */
273 int framesize;
274 } u;
275 /* Points to our private memory word on the stack. */
276 int private_data_ptr;
277 } bracket_backtrack;
278
279 typedef struct bracketpos_backtrack {
280 backtrack_common common;
281 /* Points to our private memory word on the stack. */
282 int private_data_ptr;
283 /* Reverting stack is needed. */
284 int framesize;
285 /* Allocated stack size. */
286 int stacksize;
287 } bracketpos_backtrack;
288
289 typedef struct braminzero_backtrack {
290 backtrack_common common;
291 struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293
294 typedef struct char_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 union {
299 jump_list *backtracks;
300 struct {
301 unsigned int othercasebit;
302 PCRE2_UCHAR chr;
303 BOOL enabled;
304 } charpos;
305 } u;
306 } char_iterator_backtrack;
307
308 typedef struct ref_iterator_backtrack {
309 backtrack_common common;
310 /* Next iteration. */
311 struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313
314 typedef struct recurse_entry {
315 struct recurse_entry *next;
316 /* Contains the function entry. */
317 struct sljit_label *entry;
318 /* Collects the calls until the function is not created. */
319 jump_list *calls;
320 /* Points to the starting opcode. */
321 sljit_sw start;
322 } recurse_entry;
323
324 typedef struct recurse_backtrack {
325 backtrack_common common;
326 BOOL inlined_pattern;
327 } recurse_backtrack;
328
329 #define OP_THEN_TRAP OP_TABLE_LENGTH
330
331 typedef struct then_trap_backtrack {
332 backtrack_common common;
333 /* If then_trap is not NULL, this structure contains the real
334 then_trap for the backtracking path. */
335 struct then_trap_backtrack *then_trap;
336 /* Points to the starting opcode. */
337 sljit_sw start;
338 /* Exit point for the then opcodes of this alternative. */
339 jump_list *quit;
340 /* Frame size of the current alternative. */
341 int framesize;
342 } then_trap_backtrack;
343
344 #define MAX_RANGE_SIZE 4
345
346 typedef struct compiler_common {
347 /* The sljit ceneric compiler. */
348 struct sljit_compiler *compiler;
349 /* First byte code. */
350 PCRE2_SPTR start;
351 /* Maps private data offset to each opcode. */
352 sljit_s32 *private_data_ptrs;
353 /* Chain list of read-only data ptrs. */
354 void *read_only_data_head;
355 /* Tells whether the capturing bracket is optimized. */
356 sljit_u8 *optimized_cbracket;
357 /* Tells whether the starting offset is a target of then. */
358 sljit_u8 *then_offsets;
359 /* Current position where a THEN must jump. */
360 then_trap_backtrack *then_trap;
361 /* Starting offset of private data for capturing brackets. */
362 sljit_s32 cbra_ptr;
363 /* Output vector starting point. Must be divisible by 2. */
364 sljit_s32 ovector_start;
365 /* Points to the starting character of the current match. */
366 sljit_s32 start_ptr;
367 /* Last known position of the requested byte. */
368 sljit_s32 req_char_ptr;
369 /* Head of the last recursion. */
370 sljit_s32 recursive_head_ptr;
371 /* First inspected character for partial matching.
372 (Needed for avoiding zero length partial matches.) */
373 sljit_s32 start_used_ptr;
374 /* Starting pointer for partial soft matches. */
375 sljit_s32 hit_start;
376 /* Pointer of the match end position. */
377 sljit_s32 match_end_ptr;
378 /* Points to the marked string. */
379 sljit_s32 mark_ptr;
380 /* Recursive control verb management chain. */
381 sljit_s32 control_head_ptr;
382 /* Points to the last matched capture block index. */
383 sljit_s32 capture_last_ptr;
384 /* Fast forward skipping byte code pointer. */
385 PCRE2_SPTR fast_forward_bc_ptr;
386 /* Locals used by fast fail optimization. */
387 sljit_s32 fast_fail_start_ptr;
388 sljit_s32 fast_fail_end_ptr;
389
390 /* Flipped and lower case tables. */
391 const sljit_u8 *fcc;
392 sljit_sw lcc;
393 /* Mode can be PCRE2_JIT_COMPLETE and others. */
394 int mode;
395 /* TRUE, when minlength is greater than 0. */
396 BOOL might_be_empty;
397 /* \K is found in the pattern. */
398 BOOL has_set_som;
399 /* (*SKIP:arg) is found in the pattern. */
400 BOOL has_skip_arg;
401 /* (*THEN) is found in the pattern. */
402 BOOL has_then;
403 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
404 BOOL has_skip_in_assert_back;
405 /* Currently in recurse or negative assert. */
406 BOOL local_exit;
407 /* Currently in a positive assert. */
408 BOOL positive_assert;
409 /* Newline control. */
410 int nltype;
411 sljit_u32 nlmax;
412 sljit_u32 nlmin;
413 int newline;
414 int bsr_nltype;
415 sljit_u32 bsr_nlmax;
416 sljit_u32 bsr_nlmin;
417 /* Dollar endonly. */
418 int endonly;
419 /* Tables. */
420 sljit_sw ctypes;
421 /* Named capturing brackets. */
422 PCRE2_SPTR name_table;
423 sljit_sw name_count;
424 sljit_sw name_entry_size;
425
426 /* Labels and jump lists. */
427 struct sljit_label *partialmatchlabel;
428 struct sljit_label *quit_label;
429 struct sljit_label *forced_quit_label;
430 struct sljit_label *accept_label;
431 struct sljit_label *ff_newline_shortcut;
432 stub_list *stubs;
433 label_addr_list *label_addrs;
434 recurse_entry *entries;
435 recurse_entry *currententry;
436 jump_list *partialmatch;
437 jump_list *quit;
438 jump_list *positive_assert_quit;
439 jump_list *forced_quit;
440 jump_list *accept;
441 jump_list *calllimit;
442 jump_list *stackalloc;
443 jump_list *revertframes;
444 jump_list *wordboundary;
445 jump_list *anynewline;
446 jump_list *hspace;
447 jump_list *vspace;
448 jump_list *casefulcmp;
449 jump_list *caselesscmp;
450 jump_list *reset_match;
451 BOOL unset_backref;
452 BOOL alt_circumflex;
453 #ifdef SUPPORT_UNICODE
454 BOOL utf;
455 BOOL use_ucp;
456 jump_list *getucd;
457 #if PCRE2_CODE_UNIT_WIDTH == 8
458 jump_list *utfreadchar;
459 jump_list *utfreadchar16;
460 jump_list *utfreadtype8;
461 #endif
462 #endif /* SUPPORT_UNICODE */
463 } compiler_common;
464
465 /* For byte_sequence_compare. */
466
467 typedef struct compare_context {
468 int length;
469 int sourcereg;
470 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
471 int ucharptr;
472 union {
473 sljit_s32 asint;
474 sljit_u16 asushort;
475 #if PCRE2_CODE_UNIT_WIDTH == 8
476 sljit_u8 asbyte;
477 sljit_u8 asuchars[4];
478 #elif PCRE2_CODE_UNIT_WIDTH == 16
479 sljit_u16 asuchars[2];
480 #elif PCRE2_CODE_UNIT_WIDTH == 32
481 sljit_u32 asuchars[1];
482 #endif
483 } c;
484 union {
485 sljit_s32 asint;
486 sljit_u16 asushort;
487 #if PCRE2_CODE_UNIT_WIDTH == 8
488 sljit_u8 asbyte;
489 sljit_u8 asuchars[4];
490 #elif PCRE2_CODE_UNIT_WIDTH == 16
491 sljit_u16 asuchars[2];
492 #elif PCRE2_CODE_UNIT_WIDTH == 32
493 sljit_u32 asuchars[1];
494 #endif
495 } oc;
496 #endif
497 } compare_context;
498
499 /* Undefine sljit macros. */
500 #undef CMP
501
502 /* Used for accessing the elements of the stack. */
503 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
504
505 #define TMP1 SLJIT_R0
506 #define TMP2 SLJIT_R2
507 #define TMP3 SLJIT_R3
508 #define STR_PTR SLJIT_S0
509 #define STR_END SLJIT_S1
510 #define STACK_TOP SLJIT_R1
511 #define STACK_LIMIT SLJIT_S2
512 #define COUNT_MATCH SLJIT_S3
513 #define ARGUMENTS SLJIT_S4
514 #define RETURN_ADDR SLJIT_R4
515
516 /* Local space layout. */
517 /* These two locals can be used by the current opcode. */
518 #define LOCALS0 (0 * sizeof(sljit_sw))
519 #define LOCALS1 (1 * sizeof(sljit_sw))
520 /* Two local variables for possessive quantifiers (char1 cannot use them). */
521 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
522 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
523 /* Max limit of recursions. */
524 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
525 /* The output vector is stored on the stack, and contains pointers
526 to characters. The vector data is divided into two groups: the first
527 group contains the start / end character pointers, and the second is
528 the start pointers when the end of the capturing group has not yet reached. */
529 #define OVECTOR_START (common->ovector_start)
530 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
531 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
533
534 #if PCRE2_CODE_UNIT_WIDTH == 8
535 #define MOV_UCHAR SLJIT_MOV_U8
536 #define MOVU_UCHAR SLJIT_MOVU_U8
537 #define IN_UCHARS(x) (x)
538 #elif PCRE2_CODE_UNIT_WIDTH == 16
539 #define MOV_UCHAR SLJIT_MOV_U16
540 #define MOVU_UCHAR SLJIT_MOVU_U16
541 #define UCHAR_SHIFT (1)
542 #define IN_UCHARS(x) ((x) * 2)
543 #elif PCRE2_CODE_UNIT_WIDTH == 32
544 #define MOV_UCHAR SLJIT_MOV_U32
545 #define MOVU_UCHAR SLJIT_MOVU_U32
546 #define UCHAR_SHIFT (2)
547 #define IN_UCHARS(x) ((x) * 4)
548 #else
549 #error Unsupported compiling mode
550 #endif
551
552 /* Shortcuts. */
553 #define DEFINE_COMPILER \
554 struct sljit_compiler *compiler = common->compiler
555 #define OP1(op, dst, dstw, src, srcw) \
556 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
557 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
558 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
559 #define LABEL() \
560 sljit_emit_label(compiler)
561 #define JUMP(type) \
562 sljit_emit_jump(compiler, (type))
563 #define JUMPTO(type, label) \
564 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
565 #define JUMPHERE(jump) \
566 sljit_set_label((jump), sljit_emit_label(compiler))
567 #define SET_LABEL(jump, label) \
568 sljit_set_label((jump), (label))
569 #define CMP(type, src1, src1w, src2, src2w) \
570 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
571 #define CMPTO(type, src1, src1w, src2, src2w, label) \
572 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
573 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
574 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
575 #define GET_LOCAL_BASE(dst, dstw, offset) \
576 sljit_get_local_base(compiler, (dst), (dstw), (offset))
577
578 #define READ_CHAR_MAX 0x7fffffff
579
bracketend(PCRE2_SPTR cc)580 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
581 {
582 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
583 do cc += GET(cc, 1); while (*cc == OP_ALT);
584 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
585 cc += 1 + LINK_SIZE;
586 return cc;
587 }
588
no_alternatives(PCRE2_SPTR cc)589 static int no_alternatives(PCRE2_SPTR cc)
590 {
591 int count = 0;
592 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
593 do
594 {
595 cc += GET(cc, 1);
596 count++;
597 }
598 while (*cc == OP_ALT);
599 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
600 return count;
601 }
602
603 /* Functions whose might need modification for all new supported opcodes:
604 next_opcode
605 check_opcode_types
606 set_private_data_ptrs
607 get_framesize
608 init_frame
609 get_private_data_copy_length
610 copy_private_data
611 compile_matchingpath
612 compile_backtrackingpath
613 */
614
next_opcode(compiler_common * common,PCRE2_SPTR cc)615 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
616 {
617 SLJIT_UNUSED_ARG(common);
618 switch(*cc)
619 {
620 case OP_SOD:
621 case OP_SOM:
622 case OP_SET_SOM:
623 case OP_NOT_WORD_BOUNDARY:
624 case OP_WORD_BOUNDARY:
625 case OP_NOT_DIGIT:
626 case OP_DIGIT:
627 case OP_NOT_WHITESPACE:
628 case OP_WHITESPACE:
629 case OP_NOT_WORDCHAR:
630 case OP_WORDCHAR:
631 case OP_ANY:
632 case OP_ALLANY:
633 case OP_NOTPROP:
634 case OP_PROP:
635 case OP_ANYNL:
636 case OP_NOT_HSPACE:
637 case OP_HSPACE:
638 case OP_NOT_VSPACE:
639 case OP_VSPACE:
640 case OP_EXTUNI:
641 case OP_EODN:
642 case OP_EOD:
643 case OP_CIRC:
644 case OP_CIRCM:
645 case OP_DOLL:
646 case OP_DOLLM:
647 case OP_CRSTAR:
648 case OP_CRMINSTAR:
649 case OP_CRPLUS:
650 case OP_CRMINPLUS:
651 case OP_CRQUERY:
652 case OP_CRMINQUERY:
653 case OP_CRRANGE:
654 case OP_CRMINRANGE:
655 case OP_CRPOSSTAR:
656 case OP_CRPOSPLUS:
657 case OP_CRPOSQUERY:
658 case OP_CRPOSRANGE:
659 case OP_CLASS:
660 case OP_NCLASS:
661 case OP_REF:
662 case OP_REFI:
663 case OP_DNREF:
664 case OP_DNREFI:
665 case OP_RECURSE:
666 case OP_CALLOUT:
667 case OP_ALT:
668 case OP_KET:
669 case OP_KETRMAX:
670 case OP_KETRMIN:
671 case OP_KETRPOS:
672 case OP_REVERSE:
673 case OP_ASSERT:
674 case OP_ASSERT_NOT:
675 case OP_ASSERTBACK:
676 case OP_ASSERTBACK_NOT:
677 case OP_ONCE:
678 case OP_ONCE_NC:
679 case OP_BRA:
680 case OP_BRAPOS:
681 case OP_CBRA:
682 case OP_CBRAPOS:
683 case OP_COND:
684 case OP_SBRA:
685 case OP_SBRAPOS:
686 case OP_SCBRA:
687 case OP_SCBRAPOS:
688 case OP_SCOND:
689 case OP_CREF:
690 case OP_DNCREF:
691 case OP_RREF:
692 case OP_DNRREF:
693 case OP_FALSE:
694 case OP_TRUE:
695 case OP_BRAZERO:
696 case OP_BRAMINZERO:
697 case OP_BRAPOSZERO:
698 case OP_PRUNE:
699 case OP_SKIP:
700 case OP_THEN:
701 case OP_COMMIT:
702 case OP_FAIL:
703 case OP_ACCEPT:
704 case OP_ASSERT_ACCEPT:
705 case OP_CLOSE:
706 case OP_SKIPZERO:
707 return cc + PRIV(OP_lengths)[*cc];
708
709 case OP_CHAR:
710 case OP_CHARI:
711 case OP_NOT:
712 case OP_NOTI:
713 case OP_STAR:
714 case OP_MINSTAR:
715 case OP_PLUS:
716 case OP_MINPLUS:
717 case OP_QUERY:
718 case OP_MINQUERY:
719 case OP_UPTO:
720 case OP_MINUPTO:
721 case OP_EXACT:
722 case OP_POSSTAR:
723 case OP_POSPLUS:
724 case OP_POSQUERY:
725 case OP_POSUPTO:
726 case OP_STARI:
727 case OP_MINSTARI:
728 case OP_PLUSI:
729 case OP_MINPLUSI:
730 case OP_QUERYI:
731 case OP_MINQUERYI:
732 case OP_UPTOI:
733 case OP_MINUPTOI:
734 case OP_EXACTI:
735 case OP_POSSTARI:
736 case OP_POSPLUSI:
737 case OP_POSQUERYI:
738 case OP_POSUPTOI:
739 case OP_NOTSTAR:
740 case OP_NOTMINSTAR:
741 case OP_NOTPLUS:
742 case OP_NOTMINPLUS:
743 case OP_NOTQUERY:
744 case OP_NOTMINQUERY:
745 case OP_NOTUPTO:
746 case OP_NOTMINUPTO:
747 case OP_NOTEXACT:
748 case OP_NOTPOSSTAR:
749 case OP_NOTPOSPLUS:
750 case OP_NOTPOSQUERY:
751 case OP_NOTPOSUPTO:
752 case OP_NOTSTARI:
753 case OP_NOTMINSTARI:
754 case OP_NOTPLUSI:
755 case OP_NOTMINPLUSI:
756 case OP_NOTQUERYI:
757 case OP_NOTMINQUERYI:
758 case OP_NOTUPTOI:
759 case OP_NOTMINUPTOI:
760 case OP_NOTEXACTI:
761 case OP_NOTPOSSTARI:
762 case OP_NOTPOSPLUSI:
763 case OP_NOTPOSQUERYI:
764 case OP_NOTPOSUPTOI:
765 cc += PRIV(OP_lengths)[*cc];
766 #ifdef SUPPORT_UNICODE
767 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
768 #endif
769 return cc;
770
771 /* Special cases. */
772 case OP_TYPESTAR:
773 case OP_TYPEMINSTAR:
774 case OP_TYPEPLUS:
775 case OP_TYPEMINPLUS:
776 case OP_TYPEQUERY:
777 case OP_TYPEMINQUERY:
778 case OP_TYPEUPTO:
779 case OP_TYPEMINUPTO:
780 case OP_TYPEEXACT:
781 case OP_TYPEPOSSTAR:
782 case OP_TYPEPOSPLUS:
783 case OP_TYPEPOSQUERY:
784 case OP_TYPEPOSUPTO:
785 return cc + PRIV(OP_lengths)[*cc] - 1;
786
787 case OP_ANYBYTE:
788 #ifdef SUPPORT_UNICODE
789 if (common->utf) return NULL;
790 #endif
791 return cc + 1;
792
793 case OP_CALLOUT_STR:
794 return cc + GET(cc, 1 + 2*LINK_SIZE);
795
796 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
797 case OP_XCLASS:
798 return cc + GET(cc, 1);
799 #endif
800
801 case OP_MARK:
802 case OP_PRUNE_ARG:
803 case OP_SKIP_ARG:
804 case OP_THEN_ARG:
805 return cc + 1 + 2 + cc[1];
806
807 default:
808 /* All opcodes are supported now! */
809 SLJIT_ASSERT_STOP();
810 return NULL;
811 }
812 }
813
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)814 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
815 {
816 int count;
817 PCRE2_SPTR slot;
818 PCRE2_SPTR assert_back_end = cc - 1;
819
820 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
821 while (cc < ccend)
822 {
823 switch(*cc)
824 {
825 case OP_SET_SOM:
826 common->has_set_som = TRUE;
827 common->might_be_empty = TRUE;
828 cc += 1;
829 break;
830
831 case OP_REF:
832 case OP_REFI:
833 common->optimized_cbracket[GET2(cc, 1)] = 0;
834 cc += 1 + IMM2_SIZE;
835 break;
836
837 case OP_CBRAPOS:
838 case OP_SCBRAPOS:
839 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
840 cc += 1 + LINK_SIZE + IMM2_SIZE;
841 break;
842
843 case OP_COND:
844 case OP_SCOND:
845 /* Only AUTO_CALLOUT can insert this opcode. We do
846 not intend to support this case. */
847 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
848 return FALSE;
849 cc += 1 + LINK_SIZE;
850 break;
851
852 case OP_CREF:
853 common->optimized_cbracket[GET2(cc, 1)] = 0;
854 cc += 1 + IMM2_SIZE;
855 break;
856
857 case OP_DNREF:
858 case OP_DNREFI:
859 case OP_DNCREF:
860 count = GET2(cc, 1 + IMM2_SIZE);
861 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
862 while (count-- > 0)
863 {
864 common->optimized_cbracket[GET2(slot, 0)] = 0;
865 slot += common->name_entry_size;
866 }
867 cc += 1 + 2 * IMM2_SIZE;
868 break;
869
870 case OP_RECURSE:
871 /* Set its value only once. */
872 if (common->recursive_head_ptr == 0)
873 {
874 common->recursive_head_ptr = common->ovector_start;
875 common->ovector_start += sizeof(sljit_sw);
876 }
877 cc += 1 + LINK_SIZE;
878 break;
879
880 case OP_CALLOUT:
881 case OP_CALLOUT_STR:
882 if (common->capture_last_ptr == 0)
883 {
884 common->capture_last_ptr = common->ovector_start;
885 common->ovector_start += sizeof(sljit_sw);
886 }
887 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
888 break;
889
890 case OP_ASSERTBACK:
891 slot = bracketend(cc);
892 if (slot > assert_back_end)
893 assert_back_end = slot;
894 cc += 1 + LINK_SIZE;
895 break;
896
897 case OP_THEN_ARG:
898 common->has_then = TRUE;
899 common->control_head_ptr = 1;
900 /* Fall through. */
901
902 case OP_PRUNE_ARG:
903 case OP_MARK:
904 if (common->mark_ptr == 0)
905 {
906 common->mark_ptr = common->ovector_start;
907 common->ovector_start += sizeof(sljit_sw);
908 }
909 cc += 1 + 2 + cc[1];
910 break;
911
912 case OP_THEN:
913 common->has_then = TRUE;
914 common->control_head_ptr = 1;
915 cc += 1;
916 break;
917
918 case OP_SKIP:
919 if (cc < assert_back_end)
920 common->has_skip_in_assert_back = TRUE;
921 cc += 1;
922 break;
923
924 case OP_SKIP_ARG:
925 common->control_head_ptr = 1;
926 common->has_skip_arg = TRUE;
927 if (cc < assert_back_end)
928 common->has_skip_in_assert_back = TRUE;
929 cc += 1 + 2 + cc[1];
930 break;
931
932 default:
933 cc = next_opcode(common, cc);
934 if (cc == NULL)
935 return FALSE;
936 break;
937 }
938 }
939 return TRUE;
940 }
941
is_accelerated_repeat(PCRE2_SPTR cc)942 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
943 {
944 switch(*cc)
945 {
946 case OP_TYPESTAR:
947 case OP_TYPEMINSTAR:
948 case OP_TYPEPLUS:
949 case OP_TYPEMINPLUS:
950 case OP_TYPEPOSSTAR:
951 case OP_TYPEPOSPLUS:
952 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
953
954 case OP_STAR:
955 case OP_MINSTAR:
956 case OP_PLUS:
957 case OP_MINPLUS:
958 case OP_POSSTAR:
959 case OP_POSPLUS:
960
961 case OP_STARI:
962 case OP_MINSTARI:
963 case OP_PLUSI:
964 case OP_MINPLUSI:
965 case OP_POSSTARI:
966 case OP_POSPLUSI:
967
968 case OP_NOTSTAR:
969 case OP_NOTMINSTAR:
970 case OP_NOTPLUS:
971 case OP_NOTMINPLUS:
972 case OP_NOTPOSSTAR:
973 case OP_NOTPOSPLUS:
974
975 case OP_NOTSTARI:
976 case OP_NOTMINSTARI:
977 case OP_NOTPLUSI:
978 case OP_NOTMINPLUSI:
979 case OP_NOTPOSSTARI:
980 case OP_NOTPOSPLUSI:
981 return TRUE;
982
983 case OP_CLASS:
984 case OP_NCLASS:
985 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
986 case OP_XCLASS:
987 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
988 #else
989 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
990 #endif
991
992 switch(*cc)
993 {
994 case OP_CRSTAR:
995 case OP_CRMINSTAR:
996 case OP_CRPLUS:
997 case OP_CRMINPLUS:
998 case OP_CRPOSSTAR:
999 case OP_CRPOSPLUS:
1000 return TRUE;
1001 }
1002 break;
1003 }
1004 return FALSE;
1005 }
1006
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1007 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1008 {
1009 PCRE2_SPTR cc = common->start;
1010 PCRE2_SPTR end;
1011
1012 /* Skip not repeated brackets. */
1013 while (TRUE)
1014 {
1015 switch(*cc)
1016 {
1017 case OP_SOD:
1018 case OP_SOM:
1019 case OP_SET_SOM:
1020 case OP_NOT_WORD_BOUNDARY:
1021 case OP_WORD_BOUNDARY:
1022 case OP_EODN:
1023 case OP_EOD:
1024 case OP_CIRC:
1025 case OP_CIRCM:
1026 case OP_DOLL:
1027 case OP_DOLLM:
1028 /* Zero width assertions. */
1029 cc++;
1030 continue;
1031 }
1032
1033 if (*cc != OP_BRA && *cc != OP_CBRA)
1034 break;
1035
1036 end = cc + GET(cc, 1);
1037 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1038 return FALSE;
1039 if (*cc == OP_CBRA)
1040 {
1041 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1042 return FALSE;
1043 cc += IMM2_SIZE;
1044 }
1045 cc += 1 + LINK_SIZE;
1046 }
1047
1048 if (is_accelerated_repeat(cc))
1049 {
1050 common->fast_forward_bc_ptr = cc;
1051 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1052 *private_data_start += sizeof(sljit_sw);
1053 return TRUE;
1054 }
1055 return FALSE;
1056 }
1057
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1058 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1059 {
1060 PCRE2_SPTR next_alt;
1061
1062 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1063
1064 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1065 return;
1066
1067 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1068 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1069 return;
1070
1071 do
1072 {
1073 next_alt = cc + GET(cc, 1);
1074
1075 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1076
1077 while (TRUE)
1078 {
1079 switch(*cc)
1080 {
1081 case OP_SOD:
1082 case OP_SOM:
1083 case OP_SET_SOM:
1084 case OP_NOT_WORD_BOUNDARY:
1085 case OP_WORD_BOUNDARY:
1086 case OP_EODN:
1087 case OP_EOD:
1088 case OP_CIRC:
1089 case OP_CIRCM:
1090 case OP_DOLL:
1091 case OP_DOLLM:
1092 /* Zero width assertions. */
1093 cc++;
1094 continue;
1095 }
1096 break;
1097 }
1098
1099 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1100 detect_fast_fail(common, cc, private_data_start, depth - 1);
1101
1102 if (is_accelerated_repeat(cc))
1103 {
1104 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1105
1106 if (common->fast_fail_start_ptr == 0)
1107 common->fast_fail_start_ptr = *private_data_start;
1108
1109 *private_data_start += sizeof(sljit_sw);
1110 common->fast_fail_end_ptr = *private_data_start;
1111
1112 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1113 return;
1114 }
1115
1116 cc = next_alt;
1117 }
1118 while (*cc == OP_ALT);
1119 }
1120
get_class_iterator_size(PCRE2_SPTR cc)1121 static int get_class_iterator_size(PCRE2_SPTR cc)
1122 {
1123 sljit_u32 min;
1124 sljit_u32 max;
1125 switch(*cc)
1126 {
1127 case OP_CRSTAR:
1128 case OP_CRPLUS:
1129 return 2;
1130
1131 case OP_CRMINSTAR:
1132 case OP_CRMINPLUS:
1133 case OP_CRQUERY:
1134 case OP_CRMINQUERY:
1135 return 1;
1136
1137 case OP_CRRANGE:
1138 case OP_CRMINRANGE:
1139 min = GET2(cc, 1);
1140 max = GET2(cc, 1 + IMM2_SIZE);
1141 if (max == 0)
1142 return (*cc == OP_CRRANGE) ? 2 : 1;
1143 max -= min;
1144 if (max > 2)
1145 max = 2;
1146 return max;
1147
1148 default:
1149 return 0;
1150 }
1151 }
1152
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1153 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1154 {
1155 PCRE2_SPTR end = bracketend(begin);
1156 PCRE2_SPTR next;
1157 PCRE2_SPTR next_end;
1158 PCRE2_SPTR max_end;
1159 PCRE2_UCHAR type;
1160 sljit_sw length = end - begin;
1161 sljit_s32 min, max, i;
1162
1163 /* Detect fixed iterations first. */
1164 if (end[-(1 + LINK_SIZE)] != OP_KET)
1165 return FALSE;
1166
1167 /* Already detected repeat. */
1168 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1169 return TRUE;
1170
1171 next = end;
1172 min = 1;
1173 while (1)
1174 {
1175 if (*next != *begin)
1176 break;
1177 next_end = bracketend(next);
1178 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1179 break;
1180 next = next_end;
1181 min++;
1182 }
1183
1184 if (min == 2)
1185 return FALSE;
1186
1187 max = 0;
1188 max_end = next;
1189 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1190 {
1191 type = *next;
1192 while (1)
1193 {
1194 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1195 break;
1196 next_end = bracketend(next + 2 + LINK_SIZE);
1197 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1198 break;
1199 next = next_end;
1200 max++;
1201 }
1202
1203 if (next[0] == type && next[1] == *begin && max >= 1)
1204 {
1205 next_end = bracketend(next + 1);
1206 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1207 {
1208 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1209 if (*next_end != OP_KET)
1210 break;
1211
1212 if (i == max)
1213 {
1214 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1215 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1216 /* +2 the original and the last. */
1217 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1218 if (min == 1)
1219 return TRUE;
1220 min--;
1221 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1222 }
1223 }
1224 }
1225 }
1226
1227 if (min >= 3)
1228 {
1229 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1230 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1231 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1232 return TRUE;
1233 }
1234
1235 return FALSE;
1236 }
1237
1238 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1239 case OP_MINSTAR: \
1240 case OP_MINPLUS: \
1241 case OP_QUERY: \
1242 case OP_MINQUERY: \
1243 case OP_MINSTARI: \
1244 case OP_MINPLUSI: \
1245 case OP_QUERYI: \
1246 case OP_MINQUERYI: \
1247 case OP_NOTMINSTAR: \
1248 case OP_NOTMINPLUS: \
1249 case OP_NOTQUERY: \
1250 case OP_NOTMINQUERY: \
1251 case OP_NOTMINSTARI: \
1252 case OP_NOTMINPLUSI: \
1253 case OP_NOTQUERYI: \
1254 case OP_NOTMINQUERYI:
1255
1256 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1257 case OP_STAR: \
1258 case OP_PLUS: \
1259 case OP_STARI: \
1260 case OP_PLUSI: \
1261 case OP_NOTSTAR: \
1262 case OP_NOTPLUS: \
1263 case OP_NOTSTARI: \
1264 case OP_NOTPLUSI:
1265
1266 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1267 case OP_UPTO: \
1268 case OP_MINUPTO: \
1269 case OP_UPTOI: \
1270 case OP_MINUPTOI: \
1271 case OP_NOTUPTO: \
1272 case OP_NOTMINUPTO: \
1273 case OP_NOTUPTOI: \
1274 case OP_NOTMINUPTOI:
1275
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1277 case OP_TYPEMINSTAR: \
1278 case OP_TYPEMINPLUS: \
1279 case OP_TYPEQUERY: \
1280 case OP_TYPEMINQUERY:
1281
1282 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1283 case OP_TYPESTAR: \
1284 case OP_TYPEPLUS:
1285
1286 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1287 case OP_TYPEUPTO: \
1288 case OP_TYPEMINUPTO:
1289
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1290 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1291 {
1292 PCRE2_SPTR cc = common->start;
1293 PCRE2_SPTR alternative;
1294 PCRE2_SPTR end = NULL;
1295 int private_data_ptr = *private_data_start;
1296 int space, size, bracketlen;
1297 BOOL repeat_check = TRUE;
1298
1299 while (cc < ccend)
1300 {
1301 space = 0;
1302 size = 0;
1303 bracketlen = 0;
1304 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1305 break;
1306
1307 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1308 {
1309 if (detect_repeat(common, cc))
1310 {
1311 /* These brackets are converted to repeats, so no global
1312 based single character repeat is allowed. */
1313 if (cc >= end)
1314 end = bracketend(cc);
1315 }
1316 }
1317 repeat_check = TRUE;
1318
1319 switch(*cc)
1320 {
1321 case OP_KET:
1322 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1323 {
1324 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1325 private_data_ptr += sizeof(sljit_sw);
1326 cc += common->private_data_ptrs[cc + 1 - common->start];
1327 }
1328 cc += 1 + LINK_SIZE;
1329 break;
1330
1331 case OP_ASSERT:
1332 case OP_ASSERT_NOT:
1333 case OP_ASSERTBACK:
1334 case OP_ASSERTBACK_NOT:
1335 case OP_ONCE:
1336 case OP_ONCE_NC:
1337 case OP_BRAPOS:
1338 case OP_SBRA:
1339 case OP_SBRAPOS:
1340 case OP_SCOND:
1341 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1342 private_data_ptr += sizeof(sljit_sw);
1343 bracketlen = 1 + LINK_SIZE;
1344 break;
1345
1346 case OP_CBRAPOS:
1347 case OP_SCBRAPOS:
1348 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349 private_data_ptr += sizeof(sljit_sw);
1350 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1351 break;
1352
1353 case OP_COND:
1354 /* Might be a hidden SCOND. */
1355 alternative = cc + GET(cc, 1);
1356 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1357 {
1358 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1359 private_data_ptr += sizeof(sljit_sw);
1360 }
1361 bracketlen = 1 + LINK_SIZE;
1362 break;
1363
1364 case OP_BRA:
1365 bracketlen = 1 + LINK_SIZE;
1366 break;
1367
1368 case OP_CBRA:
1369 case OP_SCBRA:
1370 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1371 break;
1372
1373 case OP_BRAZERO:
1374 case OP_BRAMINZERO:
1375 case OP_BRAPOSZERO:
1376 repeat_check = FALSE;
1377 size = 1;
1378 break;
1379
1380 CASE_ITERATOR_PRIVATE_DATA_1
1381 space = 1;
1382 size = -2;
1383 break;
1384
1385 CASE_ITERATOR_PRIVATE_DATA_2A
1386 space = 2;
1387 size = -2;
1388 break;
1389
1390 CASE_ITERATOR_PRIVATE_DATA_2B
1391 space = 2;
1392 size = -(2 + IMM2_SIZE);
1393 break;
1394
1395 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1396 space = 1;
1397 size = 1;
1398 break;
1399
1400 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1401 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1402 space = 2;
1403 size = 1;
1404 break;
1405
1406 case OP_TYPEUPTO:
1407 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1408 space = 2;
1409 size = 1 + IMM2_SIZE;
1410 break;
1411
1412 case OP_TYPEMINUPTO:
1413 space = 2;
1414 size = 1 + IMM2_SIZE;
1415 break;
1416
1417 case OP_CLASS:
1418 case OP_NCLASS:
1419 space = get_class_iterator_size(cc + size);
1420 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1421 break;
1422
1423 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1424 case OP_XCLASS:
1425 space = get_class_iterator_size(cc + size);
1426 size = GET(cc, 1);
1427 break;
1428 #endif
1429
1430 default:
1431 cc = next_opcode(common, cc);
1432 SLJIT_ASSERT(cc != NULL);
1433 break;
1434 }
1435
1436 /* Character iterators, which are not inside a repeated bracket,
1437 gets a private slot instead of allocating it on the stack. */
1438 if (space > 0 && cc >= end)
1439 {
1440 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1441 private_data_ptr += sizeof(sljit_sw) * space;
1442 }
1443
1444 if (size != 0)
1445 {
1446 if (size < 0)
1447 {
1448 cc += -size;
1449 #ifdef SUPPORT_UNICODE
1450 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1451 #endif
1452 }
1453 else
1454 cc += size;
1455 }
1456
1457 if (bracketlen > 0)
1458 {
1459 if (cc >= end)
1460 {
1461 end = bracketend(cc);
1462 if (end[-1 - LINK_SIZE] == OP_KET)
1463 end = NULL;
1464 }
1465 cc += bracketlen;
1466 }
1467 }
1468 *private_data_start = private_data_ptr;
1469 }
1470
1471 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1472 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1473 {
1474 int length = 0;
1475 int possessive = 0;
1476 BOOL stack_restore = FALSE;
1477 BOOL setsom_found = recursive;
1478 BOOL setmark_found = recursive;
1479 /* The last capture is a local variable even for recursions. */
1480 BOOL capture_last_found = FALSE;
1481
1482 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1483 SLJIT_ASSERT(common->control_head_ptr != 0);
1484 *needs_control_head = TRUE;
1485 #else
1486 *needs_control_head = FALSE;
1487 #endif
1488
1489 if (ccend == NULL)
1490 {
1491 ccend = bracketend(cc) - (1 + LINK_SIZE);
1492 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1493 {
1494 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1495 /* This is correct regardless of common->capture_last_ptr. */
1496 capture_last_found = TRUE;
1497 }
1498 cc = next_opcode(common, cc);
1499 }
1500
1501 SLJIT_ASSERT(cc != NULL);
1502 while (cc < ccend)
1503 switch(*cc)
1504 {
1505 case OP_SET_SOM:
1506 SLJIT_ASSERT(common->has_set_som);
1507 stack_restore = TRUE;
1508 if (!setsom_found)
1509 {
1510 length += 2;
1511 setsom_found = TRUE;
1512 }
1513 cc += 1;
1514 break;
1515
1516 case OP_MARK:
1517 case OP_PRUNE_ARG:
1518 case OP_THEN_ARG:
1519 SLJIT_ASSERT(common->mark_ptr != 0);
1520 stack_restore = TRUE;
1521 if (!setmark_found)
1522 {
1523 length += 2;
1524 setmark_found = TRUE;
1525 }
1526 if (common->control_head_ptr != 0)
1527 *needs_control_head = TRUE;
1528 cc += 1 + 2 + cc[1];
1529 break;
1530
1531 case OP_RECURSE:
1532 stack_restore = TRUE;
1533 if (common->has_set_som && !setsom_found)
1534 {
1535 length += 2;
1536 setsom_found = TRUE;
1537 }
1538 if (common->mark_ptr != 0 && !setmark_found)
1539 {
1540 length += 2;
1541 setmark_found = TRUE;
1542 }
1543 if (common->capture_last_ptr != 0 && !capture_last_found)
1544 {
1545 length += 2;
1546 capture_last_found = TRUE;
1547 }
1548 cc += 1 + LINK_SIZE;
1549 break;
1550
1551 case OP_CBRA:
1552 case OP_CBRAPOS:
1553 case OP_SCBRA:
1554 case OP_SCBRAPOS:
1555 stack_restore = TRUE;
1556 if (common->capture_last_ptr != 0 && !capture_last_found)
1557 {
1558 length += 2;
1559 capture_last_found = TRUE;
1560 }
1561 length += 3;
1562 cc += 1 + LINK_SIZE + IMM2_SIZE;
1563 break;
1564
1565 case OP_THEN:
1566 stack_restore = TRUE;
1567 if (common->control_head_ptr != 0)
1568 *needs_control_head = TRUE;
1569 cc ++;
1570 break;
1571
1572 default:
1573 stack_restore = TRUE;
1574 /* Fall through. */
1575
1576 case OP_NOT_WORD_BOUNDARY:
1577 case OP_WORD_BOUNDARY:
1578 case OP_NOT_DIGIT:
1579 case OP_DIGIT:
1580 case OP_NOT_WHITESPACE:
1581 case OP_WHITESPACE:
1582 case OP_NOT_WORDCHAR:
1583 case OP_WORDCHAR:
1584 case OP_ANY:
1585 case OP_ALLANY:
1586 case OP_ANYBYTE:
1587 case OP_NOTPROP:
1588 case OP_PROP:
1589 case OP_ANYNL:
1590 case OP_NOT_HSPACE:
1591 case OP_HSPACE:
1592 case OP_NOT_VSPACE:
1593 case OP_VSPACE:
1594 case OP_EXTUNI:
1595 case OP_EODN:
1596 case OP_EOD:
1597 case OP_CIRC:
1598 case OP_CIRCM:
1599 case OP_DOLL:
1600 case OP_DOLLM:
1601 case OP_CHAR:
1602 case OP_CHARI:
1603 case OP_NOT:
1604 case OP_NOTI:
1605
1606 case OP_EXACT:
1607 case OP_POSSTAR:
1608 case OP_POSPLUS:
1609 case OP_POSQUERY:
1610 case OP_POSUPTO:
1611
1612 case OP_EXACTI:
1613 case OP_POSSTARI:
1614 case OP_POSPLUSI:
1615 case OP_POSQUERYI:
1616 case OP_POSUPTOI:
1617
1618 case OP_NOTEXACT:
1619 case OP_NOTPOSSTAR:
1620 case OP_NOTPOSPLUS:
1621 case OP_NOTPOSQUERY:
1622 case OP_NOTPOSUPTO:
1623
1624 case OP_NOTEXACTI:
1625 case OP_NOTPOSSTARI:
1626 case OP_NOTPOSPLUSI:
1627 case OP_NOTPOSQUERYI:
1628 case OP_NOTPOSUPTOI:
1629
1630 case OP_TYPEEXACT:
1631 case OP_TYPEPOSSTAR:
1632 case OP_TYPEPOSPLUS:
1633 case OP_TYPEPOSQUERY:
1634 case OP_TYPEPOSUPTO:
1635
1636 case OP_CLASS:
1637 case OP_NCLASS:
1638 case OP_XCLASS:
1639
1640 case OP_CALLOUT:
1641 case OP_CALLOUT_STR:
1642
1643 cc = next_opcode(common, cc);
1644 SLJIT_ASSERT(cc != NULL);
1645 break;
1646 }
1647
1648 /* Possessive quantifiers can use a special case. */
1649 if (SLJIT_UNLIKELY(possessive == length))
1650 return stack_restore ? no_frame : no_stack;
1651
1652 if (length > 0)
1653 return length + 1;
1654 return stack_restore ? no_frame : no_stack;
1655 }
1656
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop,BOOL recursive)1657 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop, BOOL recursive)
1658 {
1659 DEFINE_COMPILER;
1660 BOOL setsom_found = recursive;
1661 BOOL setmark_found = recursive;
1662 /* The last capture is a local variable even for recursions. */
1663 BOOL capture_last_found = FALSE;
1664 int offset;
1665
1666 /* >= 1 + shortest item size (2) */
1667 SLJIT_UNUSED_ARG(stacktop);
1668 SLJIT_ASSERT(stackpos >= stacktop + 2);
1669
1670 stackpos = STACK(stackpos);
1671 if (ccend == NULL)
1672 {
1673 ccend = bracketend(cc) - (1 + LINK_SIZE);
1674 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1675 cc = next_opcode(common, cc);
1676 }
1677
1678 SLJIT_ASSERT(cc != NULL);
1679 while (cc < ccend)
1680 switch(*cc)
1681 {
1682 case OP_SET_SOM:
1683 SLJIT_ASSERT(common->has_set_som);
1684 if (!setsom_found)
1685 {
1686 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1688 stackpos += (int)sizeof(sljit_sw);
1689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1690 stackpos += (int)sizeof(sljit_sw);
1691 setsom_found = TRUE;
1692 }
1693 cc += 1;
1694 break;
1695
1696 case OP_MARK:
1697 case OP_PRUNE_ARG:
1698 case OP_THEN_ARG:
1699 SLJIT_ASSERT(common->mark_ptr != 0);
1700 if (!setmark_found)
1701 {
1702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1704 stackpos += (int)sizeof(sljit_sw);
1705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1706 stackpos += (int)sizeof(sljit_sw);
1707 setmark_found = TRUE;
1708 }
1709 cc += 1 + 2 + cc[1];
1710 break;
1711
1712 case OP_RECURSE:
1713 if (common->has_set_som && !setsom_found)
1714 {
1715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1717 stackpos += (int)sizeof(sljit_sw);
1718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1719 stackpos += (int)sizeof(sljit_sw);
1720 setsom_found = TRUE;
1721 }
1722 if (common->mark_ptr != 0 && !setmark_found)
1723 {
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1726 stackpos += (int)sizeof(sljit_sw);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1728 stackpos += (int)sizeof(sljit_sw);
1729 setmark_found = TRUE;
1730 }
1731 if (common->capture_last_ptr != 0 && !capture_last_found)
1732 {
1733 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1735 stackpos += (int)sizeof(sljit_sw);
1736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1737 stackpos += (int)sizeof(sljit_sw);
1738 capture_last_found = TRUE;
1739 }
1740 cc += 1 + LINK_SIZE;
1741 break;
1742
1743 case OP_CBRA:
1744 case OP_CBRAPOS:
1745 case OP_SCBRA:
1746 case OP_SCBRAPOS:
1747 if (common->capture_last_ptr != 0 && !capture_last_found)
1748 {
1749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1751 stackpos += (int)sizeof(sljit_sw);
1752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1753 stackpos += (int)sizeof(sljit_sw);
1754 capture_last_found = TRUE;
1755 }
1756 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1757 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1758 stackpos += (int)sizeof(sljit_sw);
1759 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1760 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1762 stackpos += (int)sizeof(sljit_sw);
1763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1764 stackpos += (int)sizeof(sljit_sw);
1765
1766 cc += 1 + LINK_SIZE + IMM2_SIZE;
1767 break;
1768
1769 default:
1770 cc = next_opcode(common, cc);
1771 SLJIT_ASSERT(cc != NULL);
1772 break;
1773 }
1774
1775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1776 SLJIT_ASSERT(stackpos == STACK(stacktop));
1777 }
1778
get_private_data_copy_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL needs_control_head)1779 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL needs_control_head)
1780 {
1781 int private_data_length = needs_control_head ? 3 : 2;
1782 int size;
1783 PCRE2_SPTR alternative;
1784 /* Calculate the sum of the private machine words. */
1785 while (cc < ccend)
1786 {
1787 size = 0;
1788 switch(*cc)
1789 {
1790 case OP_KET:
1791 if (PRIVATE_DATA(cc) != 0)
1792 {
1793 private_data_length++;
1794 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1795 cc += PRIVATE_DATA(cc + 1);
1796 }
1797 cc += 1 + LINK_SIZE;
1798 break;
1799
1800 case OP_ASSERT:
1801 case OP_ASSERT_NOT:
1802 case OP_ASSERTBACK:
1803 case OP_ASSERTBACK_NOT:
1804 case OP_ONCE:
1805 case OP_ONCE_NC:
1806 case OP_BRAPOS:
1807 case OP_SBRA:
1808 case OP_SBRAPOS:
1809 case OP_SCOND:
1810 private_data_length++;
1811 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1812 cc += 1 + LINK_SIZE;
1813 break;
1814
1815 case OP_CBRA:
1816 case OP_SCBRA:
1817 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1818 private_data_length++;
1819 cc += 1 + LINK_SIZE + IMM2_SIZE;
1820 break;
1821
1822 case OP_CBRAPOS:
1823 case OP_SCBRAPOS:
1824 private_data_length += 2;
1825 cc += 1 + LINK_SIZE + IMM2_SIZE;
1826 break;
1827
1828 case OP_COND:
1829 /* Might be a hidden SCOND. */
1830 alternative = cc + GET(cc, 1);
1831 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1832 private_data_length++;
1833 cc += 1 + LINK_SIZE;
1834 break;
1835
1836 CASE_ITERATOR_PRIVATE_DATA_1
1837 if (PRIVATE_DATA(cc))
1838 private_data_length++;
1839 cc += 2;
1840 #ifdef SUPPORT_UNICODE
1841 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1842 #endif
1843 break;
1844
1845 CASE_ITERATOR_PRIVATE_DATA_2A
1846 if (PRIVATE_DATA(cc))
1847 private_data_length += 2;
1848 cc += 2;
1849 #ifdef SUPPORT_UNICODE
1850 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1851 #endif
1852 break;
1853
1854 CASE_ITERATOR_PRIVATE_DATA_2B
1855 if (PRIVATE_DATA(cc))
1856 private_data_length += 2;
1857 cc += 2 + IMM2_SIZE;
1858 #ifdef SUPPORT_UNICODE
1859 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1860 #endif
1861 break;
1862
1863 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1864 if (PRIVATE_DATA(cc))
1865 private_data_length++;
1866 cc += 1;
1867 break;
1868
1869 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1870 if (PRIVATE_DATA(cc))
1871 private_data_length += 2;
1872 cc += 1;
1873 break;
1874
1875 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1876 if (PRIVATE_DATA(cc))
1877 private_data_length += 2;
1878 cc += 1 + IMM2_SIZE;
1879 break;
1880
1881 case OP_CLASS:
1882 case OP_NCLASS:
1883 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1884 case OP_XCLASS:
1885 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
1886 #else
1887 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
1888 #endif
1889 if (PRIVATE_DATA(cc))
1890 private_data_length += get_class_iterator_size(cc + size);
1891 cc += size;
1892 break;
1893
1894 default:
1895 cc = next_opcode(common, cc);
1896 SLJIT_ASSERT(cc != NULL);
1897 break;
1898 }
1899 }
1900 SLJIT_ASSERT(cc == ccend);
1901 return private_data_length;
1902 }
1903
copy_private_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1904 static void copy_private_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
1905 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1906 {
1907 DEFINE_COMPILER;
1908 int srcw[2];
1909 int count, size;
1910 BOOL tmp1next = TRUE;
1911 BOOL tmp1empty = TRUE;
1912 BOOL tmp2empty = TRUE;
1913 PCRE2_SPTR alternative;
1914 enum {
1915 start,
1916 loop,
1917 end
1918 } status;
1919
1920 status = save ? start : loop;
1921 stackptr = STACK(stackptr - 2);
1922 stacktop = STACK(stacktop - 1);
1923
1924 if (!save)
1925 {
1926 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1927 if (stackptr < stacktop)
1928 {
1929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1930 stackptr += sizeof(sljit_sw);
1931 tmp1empty = FALSE;
1932 }
1933 if (stackptr < stacktop)
1934 {
1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936 stackptr += sizeof(sljit_sw);
1937 tmp2empty = FALSE;
1938 }
1939 /* The tmp1next must be TRUE in either way. */
1940 }
1941
1942 do
1943 {
1944 count = 0;
1945 switch(status)
1946 {
1947 case start:
1948 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1949 count = 1;
1950 srcw[0] = common->recursive_head_ptr;
1951 if (needs_control_head)
1952 {
1953 SLJIT_ASSERT(common->control_head_ptr != 0);
1954 count = 2;
1955 srcw[1] = common->control_head_ptr;
1956 }
1957 status = loop;
1958 break;
1959
1960 case loop:
1961 if (cc >= ccend)
1962 {
1963 status = end;
1964 break;
1965 }
1966
1967 switch(*cc)
1968 {
1969 case OP_KET:
1970 if (PRIVATE_DATA(cc) != 0)
1971 {
1972 count = 1;
1973 srcw[0] = PRIVATE_DATA(cc);
1974 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1975 cc += PRIVATE_DATA(cc + 1);
1976 }
1977 cc += 1 + LINK_SIZE;
1978 break;
1979
1980 case OP_ASSERT:
1981 case OP_ASSERT_NOT:
1982 case OP_ASSERTBACK:
1983 case OP_ASSERTBACK_NOT:
1984 case OP_ONCE:
1985 case OP_ONCE_NC:
1986 case OP_BRAPOS:
1987 case OP_SBRA:
1988 case OP_SBRAPOS:
1989 case OP_SCOND:
1990 count = 1;
1991 srcw[0] = PRIVATE_DATA(cc);
1992 SLJIT_ASSERT(srcw[0] != 0);
1993 cc += 1 + LINK_SIZE;
1994 break;
1995
1996 case OP_CBRA:
1997 case OP_SCBRA:
1998 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1999 {
2000 count = 1;
2001 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2002 }
2003 cc += 1 + LINK_SIZE + IMM2_SIZE;
2004 break;
2005
2006 case OP_CBRAPOS:
2007 case OP_SCBRAPOS:
2008 count = 2;
2009 srcw[0] = PRIVATE_DATA(cc);
2010 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2011 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
2012 cc += 1 + LINK_SIZE + IMM2_SIZE;
2013 break;
2014
2015 case OP_COND:
2016 /* Might be a hidden SCOND. */
2017 alternative = cc + GET(cc, 1);
2018 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2019 {
2020 count = 1;
2021 srcw[0] = PRIVATE_DATA(cc);
2022 SLJIT_ASSERT(srcw[0] != 0);
2023 }
2024 cc += 1 + LINK_SIZE;
2025 break;
2026
2027 CASE_ITERATOR_PRIVATE_DATA_1
2028 if (PRIVATE_DATA(cc))
2029 {
2030 count = 1;
2031 srcw[0] = PRIVATE_DATA(cc);
2032 }
2033 cc += 2;
2034 #ifdef SUPPORT_UNICODE
2035 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2036 #endif
2037 break;
2038
2039 CASE_ITERATOR_PRIVATE_DATA_2A
2040 if (PRIVATE_DATA(cc))
2041 {
2042 count = 2;
2043 srcw[0] = PRIVATE_DATA(cc);
2044 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2045 }
2046 cc += 2;
2047 #ifdef SUPPORT_UNICODE
2048 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2049 #endif
2050 break;
2051
2052 CASE_ITERATOR_PRIVATE_DATA_2B
2053 if (PRIVATE_DATA(cc))
2054 {
2055 count = 2;
2056 srcw[0] = PRIVATE_DATA(cc);
2057 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2058 }
2059 cc += 2 + IMM2_SIZE;
2060 #ifdef SUPPORT_UNICODE
2061 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2062 #endif
2063 break;
2064
2065 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2066 if (PRIVATE_DATA(cc))
2067 {
2068 count = 1;
2069 srcw[0] = PRIVATE_DATA(cc);
2070 }
2071 cc += 1;
2072 break;
2073
2074 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2075 if (PRIVATE_DATA(cc))
2076 {
2077 count = 2;
2078 srcw[0] = PRIVATE_DATA(cc);
2079 srcw[1] = srcw[0] + sizeof(sljit_sw);
2080 }
2081 cc += 1;
2082 break;
2083
2084 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2085 if (PRIVATE_DATA(cc))
2086 {
2087 count = 2;
2088 srcw[0] = PRIVATE_DATA(cc);
2089 srcw[1] = srcw[0] + sizeof(sljit_sw);
2090 }
2091 cc += 1 + IMM2_SIZE;
2092 break;
2093
2094 case OP_CLASS:
2095 case OP_NCLASS:
2096 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2097 case OP_XCLASS:
2098 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2099 #else
2100 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2101 #endif
2102 if (PRIVATE_DATA(cc))
2103 switch(get_class_iterator_size(cc + size))
2104 {
2105 case 1:
2106 count = 1;
2107 srcw[0] = PRIVATE_DATA(cc);
2108 break;
2109
2110 case 2:
2111 count = 2;
2112 srcw[0] = PRIVATE_DATA(cc);
2113 srcw[1] = srcw[0] + sizeof(sljit_sw);
2114 break;
2115
2116 default:
2117 SLJIT_ASSERT_STOP();
2118 break;
2119 }
2120 cc += size;
2121 break;
2122
2123 default:
2124 cc = next_opcode(common, cc);
2125 SLJIT_ASSERT(cc != NULL);
2126 break;
2127 }
2128 break;
2129
2130 case end:
2131 SLJIT_ASSERT_STOP();
2132 break;
2133 }
2134
2135 while (count > 0)
2136 {
2137 count--;
2138 if (save)
2139 {
2140 if (tmp1next)
2141 {
2142 if (!tmp1empty)
2143 {
2144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2145 stackptr += sizeof(sljit_sw);
2146 }
2147 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2148 tmp1empty = FALSE;
2149 tmp1next = FALSE;
2150 }
2151 else
2152 {
2153 if (!tmp2empty)
2154 {
2155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2156 stackptr += sizeof(sljit_sw);
2157 }
2158 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2159 tmp2empty = FALSE;
2160 tmp1next = TRUE;
2161 }
2162 }
2163 else
2164 {
2165 if (tmp1next)
2166 {
2167 SLJIT_ASSERT(!tmp1empty);
2168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2169 tmp1empty = stackptr >= stacktop;
2170 if (!tmp1empty)
2171 {
2172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2173 stackptr += sizeof(sljit_sw);
2174 }
2175 tmp1next = FALSE;
2176 }
2177 else
2178 {
2179 SLJIT_ASSERT(!tmp2empty);
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2181 tmp2empty = stackptr >= stacktop;
2182 if (!tmp2empty)
2183 {
2184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2185 stackptr += sizeof(sljit_sw);
2186 }
2187 tmp1next = TRUE;
2188 }
2189 }
2190 }
2191 }
2192 while (status != end);
2193
2194 if (save)
2195 {
2196 if (tmp1next)
2197 {
2198 if (!tmp1empty)
2199 {
2200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2201 stackptr += sizeof(sljit_sw);
2202 }
2203 if (!tmp2empty)
2204 {
2205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2206 stackptr += sizeof(sljit_sw);
2207 }
2208 }
2209 else
2210 {
2211 if (!tmp2empty)
2212 {
2213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2214 stackptr += sizeof(sljit_sw);
2215 }
2216 if (!tmp1empty)
2217 {
2218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2219 stackptr += sizeof(sljit_sw);
2220 }
2221 }
2222 }
2223 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2224 }
2225
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2226 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2227 {
2228 PCRE2_SPTR end = bracketend(cc);
2229 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2230
2231 /* Assert captures then. */
2232 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2233 current_offset = NULL;
2234 /* Conditional block does not. */
2235 if (*cc == OP_COND || *cc == OP_SCOND)
2236 has_alternatives = FALSE;
2237
2238 cc = next_opcode(common, cc);
2239 if (has_alternatives)
2240 current_offset = common->then_offsets + (cc - common->start);
2241
2242 while (cc < end)
2243 {
2244 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2245 cc = set_then_offsets(common, cc, current_offset);
2246 else
2247 {
2248 if (*cc == OP_ALT && has_alternatives)
2249 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2250 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2251 *current_offset = 1;
2252 cc = next_opcode(common, cc);
2253 }
2254 }
2255
2256 return end;
2257 }
2258
2259 #undef CASE_ITERATOR_PRIVATE_DATA_1
2260 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2261 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2262 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2263 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2264 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2265
is_powerof2(unsigned int value)2266 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2267 {
2268 return (value & (value - 1)) == 0;
2269 }
2270
set_jumps(jump_list * list,struct sljit_label * label)2271 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2272 {
2273 while (list)
2274 {
2275 /* sljit_set_label is clever enough to do nothing
2276 if either the jump or the label is NULL. */
2277 SET_LABEL(list->jump, label);
2278 list = list->next;
2279 }
2280 }
2281
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2282 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2283 {
2284 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2285 if (list_item)
2286 {
2287 list_item->next = *list;
2288 list_item->jump = jump;
2289 *list = list_item;
2290 }
2291 }
2292
add_stub(compiler_common * common,struct sljit_jump * start)2293 static void add_stub(compiler_common *common, struct sljit_jump *start)
2294 {
2295 DEFINE_COMPILER;
2296 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2297
2298 if (list_item)
2299 {
2300 list_item->start = start;
2301 list_item->quit = LABEL();
2302 list_item->next = common->stubs;
2303 common->stubs = list_item;
2304 }
2305 }
2306
flush_stubs(compiler_common * common)2307 static void flush_stubs(compiler_common *common)
2308 {
2309 DEFINE_COMPILER;
2310 stub_list *list_item = common->stubs;
2311
2312 while (list_item)
2313 {
2314 JUMPHERE(list_item->start);
2315 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2316 JUMPTO(SLJIT_JUMP, list_item->quit);
2317 list_item = list_item->next;
2318 }
2319 common->stubs = NULL;
2320 }
2321
add_label_addr(compiler_common * common,sljit_uw * update_addr)2322 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2323 {
2324 DEFINE_COMPILER;
2325 label_addr_list *label_addr;
2326
2327 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2328 if (label_addr == NULL)
2329 return;
2330 label_addr->label = LABEL();
2331 label_addr->update_addr = update_addr;
2332 label_addr->next = common->label_addrs;
2333 common->label_addrs = label_addr;
2334 }
2335
count_match(compiler_common * common)2336 static SLJIT_INLINE void count_match(compiler_common *common)
2337 {
2338 DEFINE_COMPILER;
2339
2340 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2341 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2342 }
2343
allocate_stack(compiler_common * common,int size)2344 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2345 {
2346 /* May destroy all locals and registers except TMP2. */
2347 DEFINE_COMPILER;
2348
2349 SLJIT_ASSERT(size > 0);
2350 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2351 #ifdef DESTROY_REGISTERS
2352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2353 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2354 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2357 #endif
2358 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2359 }
2360
free_stack(compiler_common * common,int size)2361 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2362 {
2363 DEFINE_COMPILER;
2364
2365 SLJIT_ASSERT(size > 0);
2366 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2367 }
2368
allocate_read_only_data(compiler_common * common,sljit_uw size)2369 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2370 {
2371 DEFINE_COMPILER;
2372 sljit_uw *result;
2373
2374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2375 return NULL;
2376
2377 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2378 if (SLJIT_UNLIKELY(result == NULL))
2379 {
2380 sljit_set_compiler_memory_error(compiler);
2381 return NULL;
2382 }
2383
2384 *(void**)result = common->read_only_data_head;
2385 common->read_only_data_head = (void *)result;
2386 return result + 1;
2387 }
2388
reset_ovector(compiler_common * common,int length)2389 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2390 {
2391 DEFINE_COMPILER;
2392 struct sljit_label *loop;
2393 sljit_s32 i;
2394
2395 /* At this point we can freely use all temporary registers. */
2396 SLJIT_ASSERT(length > 1);
2397 /* TMP1 returns with begin - 1. */
2398 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2399 if (length < 8)
2400 {
2401 for (i = 1; i < length; i++)
2402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2403 }
2404 else
2405 {
2406 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2407 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2408 loop = LABEL();
2409 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2410 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2411 JUMPTO(SLJIT_NOT_ZERO, loop);
2412 }
2413 }
2414
reset_fast_fail(compiler_common * common)2415 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2416 {
2417 DEFINE_COMPILER;
2418 sljit_s32 i;
2419
2420 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2421
2422 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2423 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2425 }
2426
do_reset_match(compiler_common * common,int length)2427 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2428 {
2429 DEFINE_COMPILER;
2430 struct sljit_label *loop;
2431 int i;
2432
2433 SLJIT_ASSERT(length > 1);
2434 /* OVECTOR(1) contains the "string begin - 1" constant. */
2435 if (length > 2)
2436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2437 if (length < 8)
2438 {
2439 for (i = 2; i < length; i++)
2440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2441 }
2442 else
2443 {
2444 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2445 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2446 loop = LABEL();
2447 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2448 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2449 JUMPTO(SLJIT_NOT_ZERO, loop);
2450 }
2451
2452 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2453 if (common->mark_ptr != 0)
2454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2455 if (common->control_head_ptr != 0)
2456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2457 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2459 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2460 }
2461
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2462 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2463 {
2464 while (current != NULL)
2465 {
2466 switch (current[-2])
2467 {
2468 case type_then_trap:
2469 break;
2470
2471 case type_mark:
2472 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[-3]) == 0)
2473 return current[-4];
2474 break;
2475
2476 default:
2477 SLJIT_ASSERT_STOP();
2478 break;
2479 }
2480 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2481 current = (sljit_sw*)current[-1];
2482 }
2483 return -1;
2484 }
2485
copy_ovector(compiler_common * common,int topbracket)2486 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2487 {
2488 DEFINE_COMPILER;
2489 struct sljit_label *loop;
2490
2491 /* At this point we can freely use all registers. */
2492 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2494
2495 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2496 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2497 if (common->mark_ptr != 0)
2498 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2499 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
2500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
2501 if (common->mark_ptr != 0)
2502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2503 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
2504 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
2505
2506 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2507 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2508
2509 loop = LABEL();
2510 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2511 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2512 /* Copy the integer value to the output buffer */
2513 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2514 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2515 #endif
2516 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
2517 if (sizeof(PCRE2_SIZE) == 4)
2518 OP1(SLJIT_MOVU_U32, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
2519 else
2520 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
2521 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2522 JUMPTO(SLJIT_NOT_ZERO, loop);
2523
2524 /* Calculate the return value, which is the maximum ovector value. */
2525 if (topbracket > 1)
2526 {
2527 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2528 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2529
2530 /* OVECTOR(0) is never equal to SLJIT_S2. */
2531 loop = LABEL();
2532 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2533 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2534 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2535 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2536 }
2537 else
2538 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2539 }
2540
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2541 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2542 {
2543 DEFINE_COMPILER;
2544 sljit_s32 mov_opcode;
2545
2546 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2547 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2548 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
2549
2550 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2551 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
2552 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
2553 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
2554
2555 /* Store match begin and end. */
2556 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
2558 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
2559
2560 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
2561
2562 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2563 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2564 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2565 #endif
2566 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
2567
2568 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S0, 0);
2569 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2570 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
2571 #endif
2572 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
2573
2574 JUMPTO(SLJIT_JUMP, quit);
2575 }
2576
check_start_used_ptr(compiler_common * common)2577 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2578 {
2579 /* May destroy TMP1. */
2580 DEFINE_COMPILER;
2581 struct sljit_jump *jump;
2582
2583 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2584 {
2585 /* The value of -1 must be kept for start_used_ptr! */
2586 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2587 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2588 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2589 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2591 JUMPHERE(jump);
2592 }
2593 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
2594 {
2595 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2597 JUMPHERE(jump);
2598 }
2599 }
2600
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)2601 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
2602 {
2603 /* Detects if the character has an othercase. */
2604 unsigned int c;
2605
2606 #ifdef SUPPORT_UNICODE
2607 if (common->utf)
2608 {
2609 GETCHAR(c, cc);
2610 if (c > 127)
2611 {
2612 return c != UCD_OTHERCASE(c);
2613 }
2614 #if PCRE2_CODE_UNIT_WIDTH != 8
2615 return common->fcc[c] != c;
2616 #endif
2617 }
2618 else
2619 #endif
2620 c = *cc;
2621 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2622 }
2623
char_othercase(compiler_common * common,unsigned int c)2624 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2625 {
2626 /* Returns with the othercase. */
2627 #ifdef SUPPORT_UNICODE
2628 if (common->utf && c > 127)
2629 {
2630 return UCD_OTHERCASE(c);
2631 }
2632 #endif
2633 return TABLE_GET(c, common->fcc, c);
2634 }
2635
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)2636 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
2637 {
2638 /* Detects if the character and its othercase has only 1 bit difference. */
2639 unsigned int c, oc, bit;
2640 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2641 int n;
2642 #endif
2643
2644 #ifdef SUPPORT_UNICODE
2645 if (common->utf)
2646 {
2647 GETCHAR(c, cc);
2648 if (c <= 127)
2649 oc = common->fcc[c];
2650 else
2651 {
2652 oc = UCD_OTHERCASE(c);
2653 }
2654 }
2655 else
2656 {
2657 c = *cc;
2658 oc = TABLE_GET(c, common->fcc, c);
2659 }
2660 #else
2661 c = *cc;
2662 oc = TABLE_GET(c, common->fcc, c);
2663 #endif
2664
2665 SLJIT_ASSERT(c != oc);
2666
2667 bit = c ^ oc;
2668 /* Optimized for English alphabet. */
2669 if (c <= 127 && bit == 0x20)
2670 return (0 << 8) | 0x20;
2671
2672 /* Since c != oc, they must have at least 1 bit difference. */
2673 if (!is_powerof2(bit))
2674 return 0;
2675
2676 #if PCRE2_CODE_UNIT_WIDTH == 8
2677
2678 #ifdef SUPPORT_UNICODE
2679 if (common->utf && c > 127)
2680 {
2681 n = GET_EXTRALEN(*cc);
2682 while ((bit & 0x3f) == 0)
2683 {
2684 n--;
2685 bit >>= 6;
2686 }
2687 return (n << 8) | bit;
2688 }
2689 #endif /* SUPPORT_UNICODE */
2690 return (0 << 8) | bit;
2691
2692 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2693
2694 #ifdef SUPPORT_UNICODE
2695 if (common->utf && c > 65535)
2696 {
2697 if (bit >= (1 << 10))
2698 bit >>= 10;
2699 else
2700 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2701 }
2702 #endif /* SUPPORT_UNICODE */
2703 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2704
2705 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
2706 }
2707
check_partial(compiler_common * common,BOOL force)2708 static void check_partial(compiler_common *common, BOOL force)
2709 {
2710 /* Checks whether a partial matching is occurred. Does not modify registers. */
2711 DEFINE_COMPILER;
2712 struct sljit_jump *jump = NULL;
2713
2714 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
2715
2716 if (common->mode == PCRE2_JIT_COMPLETE)
2717 return;
2718
2719 if (!force)
2720 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2721 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2722 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2723
2724 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2726 else
2727 {
2728 if (common->partialmatchlabel != NULL)
2729 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2730 else
2731 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2732 }
2733
2734 if (jump != NULL)
2735 JUMPHERE(jump);
2736 }
2737
check_str_end(compiler_common * common,jump_list ** end_reached)2738 static void check_str_end(compiler_common *common, jump_list **end_reached)
2739 {
2740 /* Does not affect registers. Usually used in a tight spot. */
2741 DEFINE_COMPILER;
2742 struct sljit_jump *jump;
2743
2744 if (common->mode == PCRE2_JIT_COMPLETE)
2745 {
2746 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2747 return;
2748 }
2749
2750 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2751 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2752 {
2753 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2755 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2756 }
2757 else
2758 {
2759 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2760 if (common->partialmatchlabel != NULL)
2761 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2762 else
2763 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2764 }
2765 JUMPHERE(jump);
2766 }
2767
detect_partial_match(compiler_common * common,jump_list ** backtracks)2768 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2769 {
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump;
2772
2773 if (common->mode == PCRE2_JIT_COMPLETE)
2774 {
2775 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2776 return;
2777 }
2778
2779 /* Partial matching mode. */
2780 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2781 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2782 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2783 {
2784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2786 }
2787 else
2788 {
2789 if (common->partialmatchlabel != NULL)
2790 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2791 else
2792 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2793 }
2794 JUMPHERE(jump);
2795 }
2796
peek_char(compiler_common * common,sljit_u32 max)2797 static void peek_char(compiler_common *common, sljit_u32 max)
2798 {
2799 /* Reads the character into TMP1, keeps STR_PTR.
2800 Does not check STR_END. TMP2 Destroyed. */
2801 DEFINE_COMPILER;
2802 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2803 struct sljit_jump *jump;
2804 #endif
2805
2806 SLJIT_UNUSED_ARG(max);
2807
2808 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2809 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2810 if (common->utf)
2811 {
2812 if (max < 128) return;
2813
2814 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2815 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2816 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2817 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2818 JUMPHERE(jump);
2819 }
2820 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
2821
2822 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
2823 if (common->utf)
2824 {
2825 if (max < 0xd800) return;
2826
2827 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2828 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2829 /* TMP2 contains the high surrogate. */
2830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2831 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2832 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2833 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2834 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2835 JUMPHERE(jump);
2836 }
2837 #endif
2838 }
2839
2840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2841
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2842 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2843 {
2844 /* Tells whether the character codes below 128 are enough
2845 to determine a match. */
2846 const sljit_u8 value = nclass ? 0xff : 0;
2847 const sljit_u8 *end = bitset + 32;
2848
2849 bitset += 16;
2850 do
2851 {
2852 if (*bitset++ != value)
2853 return FALSE;
2854 }
2855 while (bitset < end);
2856 return TRUE;
2857 }
2858
read_char7_type(compiler_common * common,BOOL full_read)2859 static void read_char7_type(compiler_common *common, BOOL full_read)
2860 {
2861 /* Reads the precise character type of a character into TMP1, if the character
2862 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2863 full_read argument tells whether characters above max are accepted or not. */
2864 DEFINE_COMPILER;
2865 struct sljit_jump *jump;
2866
2867 SLJIT_ASSERT(common->utf);
2868
2869 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871
2872 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2873
2874 if (full_read)
2875 {
2876 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2877 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2879 JUMPHERE(jump);
2880 }
2881 }
2882
2883 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
2884
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2885 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2886 {
2887 /* Reads the precise value of a character into TMP1, if the character is
2888 between min and max (c >= min && c <= max). Otherwise it returns with a value
2889 outside the range. Does not check STR_END. */
2890 DEFINE_COMPILER;
2891 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2892 struct sljit_jump *jump;
2893 #endif
2894 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2895 struct sljit_jump *jump2;
2896 #endif
2897
2898 SLJIT_UNUSED_ARG(update_str_ptr);
2899 SLJIT_UNUSED_ARG(min);
2900 SLJIT_UNUSED_ARG(max);
2901 SLJIT_ASSERT(min <= max);
2902
2903 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2904 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2905
2906 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
2907 if (common->utf)
2908 {
2909 if (max < 128 && !update_str_ptr) return;
2910
2911 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2912 if (min >= 0x10000)
2913 {
2914 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2915 if (update_str_ptr)
2916 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2917 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2918 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2919 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2920 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2921 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2922 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2923 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2924 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2927 if (!update_str_ptr)
2928 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2929 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2930 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2931 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2932 JUMPHERE(jump2);
2933 if (update_str_ptr)
2934 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2935 }
2936 else if (min >= 0x800 && max <= 0xffff)
2937 {
2938 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2939 if (update_str_ptr)
2940 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2941 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2942 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2943 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2944 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2945 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2946 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2947 if (!update_str_ptr)
2948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2950 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2951 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2952 JUMPHERE(jump2);
2953 if (update_str_ptr)
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2955 }
2956 else if (max >= 0x800)
2957 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2958 else if (max < 128)
2959 {
2960 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2962 }
2963 else
2964 {
2965 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2966 if (!update_str_ptr)
2967 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2968 else
2969 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2970 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2973 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2974 if (update_str_ptr)
2975 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2976 }
2977 JUMPHERE(jump);
2978 }
2979 #endif
2980
2981 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
2982 if (common->utf)
2983 {
2984 if (max >= 0x10000)
2985 {
2986 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2987 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2988 /* TMP2 contains the high surrogate. */
2989 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2990 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2991 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2993 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2994 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2995 JUMPHERE(jump);
2996 return;
2997 }
2998
2999 if (max < 0xd800 && !update_str_ptr) return;
3000
3001 /* Skip low surrogate if necessary. */
3002 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3003 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3004 if (update_str_ptr)
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3006 if (max >= 0xd800)
3007 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3008 JUMPHERE(jump);
3009 }
3010 #endif
3011 }
3012
read_char(compiler_common * common)3013 static SLJIT_INLINE void read_char(compiler_common *common)
3014 {
3015 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3016 }
3017
read_char8_type(compiler_common * common,BOOL update_str_ptr)3018 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3019 {
3020 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3021 DEFINE_COMPILER;
3022 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3023 struct sljit_jump *jump;
3024 #endif
3025 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3026 struct sljit_jump *jump2;
3027 #endif
3028
3029 SLJIT_UNUSED_ARG(update_str_ptr);
3030
3031 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3033
3034 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3035 if (common->utf)
3036 {
3037 /* This can be an extra read in some situations, but hopefully
3038 it is needed in most cases. */
3039 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3040 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3041 if (!update_str_ptr)
3042 {
3043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3045 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3046 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3047 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3048 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3050 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3051 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3052 JUMPHERE(jump2);
3053 }
3054 else
3055 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3056 JUMPHERE(jump);
3057 return;
3058 }
3059 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3060
3061 #if PCRE2_CODE_UNIT_WIDTH != 8
3062 /* The ctypes array contains only 256 values. */
3063 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3064 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3065 #endif
3066 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3067 #if PCRE2_CODE_UNIT_WIDTH != 8
3068 JUMPHERE(jump);
3069 #endif
3070
3071 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3072 if (common->utf && update_str_ptr)
3073 {
3074 /* Skip low surrogate if necessary. */
3075 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3076 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3078 JUMPHERE(jump);
3079 }
3080 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3081 }
3082
skip_char_back(compiler_common * common)3083 static void skip_char_back(compiler_common *common)
3084 {
3085 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3086 DEFINE_COMPILER;
3087 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3088 #if PCRE2_CODE_UNIT_WIDTH == 8
3089 struct sljit_label *label;
3090
3091 if (common->utf)
3092 {
3093 label = LABEL();
3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3095 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3096 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3097 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3098 return;
3099 }
3100 #elif PCRE2_CODE_UNIT_WIDTH == 16
3101 if (common->utf)
3102 {
3103 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3104 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3105 /* Skip low surrogate if necessary. */
3106 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3107 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3108 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3109 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3110 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3111 return;
3112 }
3113 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3114 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3115 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3116 }
3117
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3118 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3119 {
3120 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3121 DEFINE_COMPILER;
3122 struct sljit_jump *jump;
3123
3124 if (nltype == NLTYPE_ANY)
3125 {
3126 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3127 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3128 }
3129 else if (nltype == NLTYPE_ANYCRLF)
3130 {
3131 if (jumpifmatch)
3132 {
3133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3134 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3135 }
3136 else
3137 {
3138 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3139 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3140 JUMPHERE(jump);
3141 }
3142 }
3143 else
3144 {
3145 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3146 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3147 }
3148 }
3149
3150 #ifdef SUPPORT_UNICODE
3151
3152 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3153 static void do_utfreadchar(compiler_common *common)
3154 {
3155 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3156 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3157 DEFINE_COMPILER;
3158 struct sljit_jump *jump;
3159
3160 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3161 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3162 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3163 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3164 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3165 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3166
3167 /* Searching for the first zero. */
3168 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3169 jump = JUMP(SLJIT_NOT_ZERO);
3170 /* Two byte sequence. */
3171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3173 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3174
3175 JUMPHERE(jump);
3176 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3177 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3178 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3179 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3180 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3181
3182 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3183 jump = JUMP(SLJIT_NOT_ZERO);
3184 /* Three byte sequence. */
3185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3186 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3187 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3188
3189 /* Four byte sequence. */
3190 JUMPHERE(jump);
3191 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3192 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3193 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3195 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3196 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3198 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3199 }
3200
do_utfreadchar16(compiler_common * common)3201 static void do_utfreadchar16(compiler_common *common)
3202 {
3203 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3204 of the character (>= 0xc0). Return value in TMP1. */
3205 DEFINE_COMPILER;
3206 struct sljit_jump *jump;
3207
3208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3209 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3210 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3211 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3212 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3213 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3214
3215 /* Searching for the first zero. */
3216 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3217 jump = JUMP(SLJIT_NOT_ZERO);
3218 /* Two byte sequence. */
3219 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3220 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3221
3222 JUMPHERE(jump);
3223 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3224 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3225 /* This code runs only in 8 bit mode. No need to shift the value. */
3226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3227 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3228 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3230 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3231 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3232 /* Three byte sequence. */
3233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3234 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3235 }
3236
do_utfreadtype8(compiler_common * common)3237 static void do_utfreadtype8(compiler_common *common)
3238 {
3239 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3240 of the character (>= 0xc0). Return value in TMP1. */
3241 DEFINE_COMPILER;
3242 struct sljit_jump *jump;
3243 struct sljit_jump *compare;
3244
3245 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3246
3247 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3248 jump = JUMP(SLJIT_NOT_ZERO);
3249 /* Two byte sequence. */
3250 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3251 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3252 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3253 /* The upper 5 bits are known at this point. */
3254 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3255 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3256 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3257 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3258 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3259 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3260
3261 JUMPHERE(compare);
3262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3263 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3264
3265 /* We only have types for characters less than 256. */
3266 JUMPHERE(jump);
3267 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3270 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3271 }
3272
3273 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3274
3275 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3276 #define UCD_BLOCK_MASK 127
3277 #define UCD_BLOCK_SHIFT 7
3278
do_getucd(compiler_common * common)3279 static void do_getucd(compiler_common *common)
3280 {
3281 /* Search the UCD record for the character comes in TMP1.
3282 Returns chartype in TMP1 and UCD offset in TMP2. */
3283 DEFINE_COMPILER;
3284
3285 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3286
3287 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3288 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3289 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3290 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3291 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3292 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3294 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3296 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3297 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3298 }
3299
3300 #endif /* SUPPORT_UNICODE */
3301
mainloop_entry(compiler_common * common,BOOL hascrorlf,sljit_u32 overall_options)3302 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, sljit_u32 overall_options)
3303 {
3304 DEFINE_COMPILER;
3305 struct sljit_label *mainloop;
3306 struct sljit_label *newlinelabel = NULL;
3307 struct sljit_jump *start;
3308 struct sljit_jump *end = NULL;
3309 struct sljit_jump *end2 = NULL;
3310 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3311 struct sljit_jump *singlechar;
3312 #endif
3313 jump_list *newline = NULL;
3314 BOOL newlinecheck = FALSE;
3315 BOOL readuchar = FALSE;
3316
3317 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
3318 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3319 newlinecheck = TRUE;
3320
3321 SLJIT_ASSERT(common->forced_quit_label == NULL);
3322
3323 if ((overall_options & PCRE2_FIRSTLINE) != 0)
3324 {
3325 /* Search for the end of the first line. */
3326 SLJIT_ASSERT(common->match_end_ptr != 0);
3327 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3328
3329 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3330 {
3331 mainloop = LABEL();
3332 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3333 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3335 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3336 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3337 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3338 JUMPHERE(end);
3339 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3340 }
3341 else
3342 {
3343 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3344 mainloop = LABEL();
3345 /* Continual stores does not cause data dependency. */
3346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3347 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3348 check_newlinechar(common, common->nltype, &newline, TRUE);
3349 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3350 JUMPHERE(end);
3351 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3352 set_jumps(newline, LABEL());
3353 }
3354
3355 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3356 }
3357 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
3358 {
3359 /* Check whether offset limit is set and valid. */
3360 SLJIT_ASSERT(common->match_end_ptr != 0);
3361
3362 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
3364 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3365 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
3366 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3367 #if PCRE2_CODE_UNIT_WIDTH == 16
3368 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3369 #elif PCRE2_CODE_UNIT_WIDTH == 32
3370 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
3371 #endif
3372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3373 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3374 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
3375 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3376 JUMPHERE(end2);
3377 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
3378 add_jump(compiler, &common->forced_quit, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
3379 JUMPHERE(end);
3380 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
3381 }
3382
3383 start = JUMP(SLJIT_JUMP);
3384
3385 if (newlinecheck)
3386 {
3387 newlinelabel = LABEL();
3388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3389 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3392 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3393 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3394 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3395 #endif
3396 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3397 end2 = JUMP(SLJIT_JUMP);
3398 }
3399
3400 mainloop = LABEL();
3401
3402 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3403 #ifdef SUPPORT_UNICODE
3404 if (common->utf) readuchar = TRUE;
3405 #endif
3406 if (newlinecheck) readuchar = TRUE;
3407
3408 if (readuchar)
3409 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3410
3411 if (newlinecheck)
3412 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3413
3414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3416 #if PCRE2_CODE_UNIT_WIDTH == 8
3417 if (common->utf)
3418 {
3419 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3420 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3422 JUMPHERE(singlechar);
3423 }
3424 #elif PCRE2_CODE_UNIT_WIDTH == 16
3425 if (common->utf)
3426 {
3427 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3428 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3429 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3430 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3431 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3432 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3433 JUMPHERE(singlechar);
3434 }
3435 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3436 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3437 JUMPHERE(start);
3438
3439 if (newlinecheck)
3440 {
3441 JUMPHERE(end);
3442 JUMPHERE(end2);
3443 }
3444
3445 return mainloop;
3446 }
3447
3448 #define MAX_N_CHARS 16
3449 #define MAX_DIFF_CHARS 6
3450
add_prefix_char(PCRE2_UCHAR chr,PCRE2_UCHAR * chars)3451 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, PCRE2_UCHAR *chars)
3452 {
3453 PCRE2_UCHAR i, len;
3454
3455 len = chars[0];
3456 if (len == 255)
3457 return;
3458
3459 if (len == 0)
3460 {
3461 chars[0] = 1;
3462 chars[1] = chr;
3463 return;
3464 }
3465
3466 for (i = len; i > 0; i--)
3467 if (chars[i] == chr)
3468 return;
3469
3470 if (len >= MAX_DIFF_CHARS - 1)
3471 {
3472 chars[0] = 255;
3473 return;
3474 }
3475
3476 len++;
3477 chars[len] = chr;
3478 chars[0] = len;
3479 }
3480
scan_prefix(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * chars,int max_chars,sljit_u32 * rec_count)3481 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *chars, int max_chars, sljit_u32 *rec_count)
3482 {
3483 /* Recursive function, which scans prefix literals. */
3484 BOOL last, any, class, caseless;
3485 int len, repeat, len_save, consumed = 0;
3486 sljit_u32 chr; /* Any unicode character. */
3487 sljit_u8 *bytes, *bytes_end, byte;
3488 PCRE2_SPTR alternative, cc_save, oc;
3489 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3490 PCRE2_UCHAR othercase[8];
3491 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3492 PCRE2_UCHAR othercase[2];
3493 #else
3494 PCRE2_UCHAR othercase[1];
3495 #endif
3496
3497 repeat = 1;
3498 while (TRUE)
3499 {
3500 if (*rec_count == 0)
3501 return 0;
3502 (*rec_count)--;
3503
3504 last = TRUE;
3505 any = FALSE;
3506 class = FALSE;
3507 caseless = FALSE;
3508
3509 switch (*cc)
3510 {
3511 case OP_CHARI:
3512 caseless = TRUE;
3513 case OP_CHAR:
3514 last = FALSE;
3515 cc++;
3516 break;
3517
3518 case OP_SOD:
3519 case OP_SOM:
3520 case OP_SET_SOM:
3521 case OP_NOT_WORD_BOUNDARY:
3522 case OP_WORD_BOUNDARY:
3523 case OP_EODN:
3524 case OP_EOD:
3525 case OP_CIRC:
3526 case OP_CIRCM:
3527 case OP_DOLL:
3528 case OP_DOLLM:
3529 /* Zero width assertions. */
3530 cc++;
3531 continue;
3532
3533 case OP_ASSERT:
3534 case OP_ASSERT_NOT:
3535 case OP_ASSERTBACK:
3536 case OP_ASSERTBACK_NOT:
3537 cc = bracketend(cc);
3538 continue;
3539
3540 case OP_PLUSI:
3541 case OP_MINPLUSI:
3542 case OP_POSPLUSI:
3543 caseless = TRUE;
3544 case OP_PLUS:
3545 case OP_MINPLUS:
3546 case OP_POSPLUS:
3547 cc++;
3548 break;
3549
3550 case OP_EXACTI:
3551 caseless = TRUE;
3552 case OP_EXACT:
3553 repeat = GET2(cc, 1);
3554 last = FALSE;
3555 cc += 1 + IMM2_SIZE;
3556 break;
3557
3558 case OP_QUERYI:
3559 case OP_MINQUERYI:
3560 case OP_POSQUERYI:
3561 caseless = TRUE;
3562 case OP_QUERY:
3563 case OP_MINQUERY:
3564 case OP_POSQUERY:
3565 len = 1;
3566 cc++;
3567 #ifdef SUPPORT_UNICODE
3568 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3569 #endif
3570 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3571 if (max_chars == 0)
3572 return consumed;
3573 last = FALSE;
3574 break;
3575
3576 case OP_KET:
3577 cc += 1 + LINK_SIZE;
3578 continue;
3579
3580 case OP_ALT:
3581 cc += GET(cc, 1);
3582 continue;
3583
3584 case OP_ONCE:
3585 case OP_ONCE_NC:
3586 case OP_BRA:
3587 case OP_BRAPOS:
3588 case OP_CBRA:
3589 case OP_CBRAPOS:
3590 alternative = cc + GET(cc, 1);
3591 while (*alternative == OP_ALT)
3592 {
3593 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3594 if (max_chars == 0)
3595 return consumed;
3596 alternative += GET(alternative, 1);
3597 }
3598
3599 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3600 cc += IMM2_SIZE;
3601 cc += 1 + LINK_SIZE;
3602 continue;
3603
3604 case OP_CLASS:
3605 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3606 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3607 return consumed;
3608 #endif
3609 class = TRUE;
3610 break;
3611
3612 case OP_NCLASS:
3613 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3614 if (common->utf) return consumed;
3615 #endif
3616 class = TRUE;
3617 break;
3618
3619 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3620 case OP_XCLASS:
3621 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3622 if (common->utf) return consumed;
3623 #endif
3624 any = TRUE;
3625 cc += GET(cc, 1);
3626 break;
3627 #endif
3628
3629 case OP_DIGIT:
3630 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3631 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3632 return consumed;
3633 #endif
3634 any = TRUE;
3635 cc++;
3636 break;
3637
3638 case OP_WHITESPACE:
3639 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3640 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3641 return consumed;
3642 #endif
3643 any = TRUE;
3644 cc++;
3645 break;
3646
3647 case OP_WORDCHAR:
3648 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3649 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3650 return consumed;
3651 #endif
3652 any = TRUE;
3653 cc++;
3654 break;
3655
3656 case OP_NOT:
3657 case OP_NOTI:
3658 cc++;
3659 /* Fall through. */
3660 case OP_NOT_DIGIT:
3661 case OP_NOT_WHITESPACE:
3662 case OP_NOT_WORDCHAR:
3663 case OP_ANY:
3664 case OP_ALLANY:
3665 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3666 if (common->utf) return consumed;
3667 #endif
3668 any = TRUE;
3669 cc++;
3670 break;
3671
3672 #ifdef SUPPORT_UNICODE
3673 case OP_NOTPROP:
3674 case OP_PROP:
3675 #if PCRE2_CODE_UNIT_WIDTH != 32
3676 if (common->utf) return consumed;
3677 #endif
3678 any = TRUE;
3679 cc += 1 + 2;
3680 break;
3681 #endif
3682
3683 case OP_TYPEEXACT:
3684 repeat = GET2(cc, 1);
3685 cc += 1 + IMM2_SIZE;
3686 continue;
3687
3688 case OP_NOTEXACT:
3689 case OP_NOTEXACTI:
3690 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3691 if (common->utf) return consumed;
3692 #endif
3693 any = TRUE;
3694 repeat = GET2(cc, 1);
3695 cc += 1 + IMM2_SIZE + 1;
3696 break;
3697
3698 default:
3699 return consumed;
3700 }
3701
3702 if (any)
3703 {
3704 do
3705 {
3706 chars[0] = 255;
3707
3708 consumed++;
3709 if (--max_chars == 0)
3710 return consumed;
3711 chars += MAX_DIFF_CHARS;
3712 }
3713 while (--repeat > 0);
3714
3715 repeat = 1;
3716 continue;
3717 }
3718
3719 if (class)
3720 {
3721 bytes = (sljit_u8*) (cc + 1);
3722 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
3723
3724 switch (*cc)
3725 {
3726 case OP_CRSTAR:
3727 case OP_CRMINSTAR:
3728 case OP_CRPOSSTAR:
3729 case OP_CRQUERY:
3730 case OP_CRMINQUERY:
3731 case OP_CRPOSQUERY:
3732 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3733 if (max_chars == 0)
3734 return consumed;
3735 break;
3736
3737 default:
3738 case OP_CRPLUS:
3739 case OP_CRMINPLUS:
3740 case OP_CRPOSPLUS:
3741 break;
3742
3743 case OP_CRRANGE:
3744 case OP_CRMINRANGE:
3745 case OP_CRPOSRANGE:
3746 repeat = GET2(cc, 1);
3747 if (repeat <= 0)
3748 return consumed;
3749 break;
3750 }
3751
3752 do
3753 {
3754 if (bytes[31] & 0x80)
3755 chars[0] = 255;
3756 else if (chars[0] != 255)
3757 {
3758 bytes_end = bytes + 32;
3759 chr = 0;
3760 do
3761 {
3762 byte = *bytes++;
3763 SLJIT_ASSERT((chr & 0x7) == 0);
3764 if (byte == 0)
3765 chr += 8;
3766 else
3767 {
3768 do
3769 {
3770 if ((byte & 0x1) != 0)
3771 add_prefix_char(chr, chars);
3772 byte >>= 1;
3773 chr++;
3774 }
3775 while (byte != 0);
3776 chr = (chr + 7) & ~7;
3777 }
3778 }
3779 while (chars[0] != 255 && bytes < bytes_end);
3780 bytes = bytes_end - 32;
3781 }
3782
3783 consumed++;
3784 if (--max_chars == 0)
3785 return consumed;
3786 chars += MAX_DIFF_CHARS;
3787 }
3788 while (--repeat > 0);
3789
3790 switch (*cc)
3791 {
3792 case OP_CRSTAR:
3793 case OP_CRMINSTAR:
3794 case OP_CRPOSSTAR:
3795 return consumed;
3796
3797 case OP_CRQUERY:
3798 case OP_CRMINQUERY:
3799 case OP_CRPOSQUERY:
3800 cc++;
3801 break;
3802
3803 case OP_CRRANGE:
3804 case OP_CRMINRANGE:
3805 case OP_CRPOSRANGE:
3806 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3807 return consumed;
3808 cc += 1 + 2 * IMM2_SIZE;
3809 break;
3810 }
3811
3812 repeat = 1;
3813 continue;
3814 }
3815
3816 len = 1;
3817 #ifdef SUPPORT_UNICODE
3818 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3819 #endif
3820
3821 if (caseless && char_has_othercase(common, cc))
3822 {
3823 #ifdef SUPPORT_UNICODE
3824 if (common->utf)
3825 {
3826 GETCHAR(chr, cc);
3827 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3828 return consumed;
3829 }
3830 else
3831 #endif
3832 {
3833 chr = *cc;
3834 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3835 }
3836 }
3837 else
3838 {
3839 caseless = FALSE;
3840 othercase[0] = 0; /* Stops compiler warning - PH */
3841 }
3842
3843 len_save = len;
3844 cc_save = cc;
3845 while (TRUE)
3846 {
3847 oc = othercase;
3848 do
3849 {
3850 chr = *cc;
3851 add_prefix_char(*cc, chars);
3852
3853 if (caseless)
3854 add_prefix_char(*oc, chars);
3855
3856 len--;
3857 consumed++;
3858 if (--max_chars == 0)
3859 return consumed;
3860 chars += MAX_DIFF_CHARS;
3861 cc++;
3862 oc++;
3863 }
3864 while (len > 0);
3865
3866 if (--repeat == 0)
3867 break;
3868
3869 len = len_save;
3870 cc = cc_save;
3871 }
3872
3873 repeat = 1;
3874 if (last)
3875 return consumed;
3876 }
3877 }
3878
3879 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3880
character_to_int32(PCRE2_UCHAR chr)3881 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
3882 {
3883 sljit_s32 value = (sljit_s32)chr;
3884 #if PCRE2_CODE_UNIT_WIDTH == 8
3885 #define SSE2_COMPARE_TYPE_INDEX 0
3886 return (value << 24) | (value << 16) | (value << 8) | value;
3887 #elif PCRE2_CODE_UNIT_WIDTH == 16
3888 #define SSE2_COMPARE_TYPE_INDEX 1
3889 return (value << 16) | value;
3890 #elif PCRE2_CODE_UNIT_WIDTH == 32
3891 #define SSE2_COMPARE_TYPE_INDEX 2
3892 return value;
3893 #else
3894 #error "Unsupported unit width"
3895 #endif
3896 }
3897
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2)3898 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
3899 {
3900 DEFINE_COMPILER;
3901 struct sljit_label *start;
3902 struct sljit_jump *quit[3];
3903 struct sljit_jump *nomatch;
3904 sljit_u8 instruction[8];
3905 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3906 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3907 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3908 BOOL load_twice = FALSE;
3909 PCRE2_UCHAR bit;
3910
3911 bit = char1 ^ char2;
3912 if (!is_powerof2(bit))
3913 bit = 0;
3914
3915 if ((char1 != char2) && bit == 0)
3916 load_twice = TRUE;
3917
3918 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3919
3920 /* First part (unaligned start) */
3921
3922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3923
3924 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3925
3926 /* MOVD xmm, r/m32 */
3927 instruction[0] = 0x66;
3928 instruction[1] = 0x0f;
3929 instruction[2] = 0x6e;
3930 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3931 sljit_emit_op_custom(compiler, instruction, 4);
3932
3933 if (char1 != char2)
3934 {
3935 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3936
3937 /* MOVD xmm, r/m32 */
3938 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3939 sljit_emit_op_custom(compiler, instruction, 4);
3940 }
3941
3942 /* PSHUFD xmm1, xmm2/m128, imm8 */
3943 instruction[2] = 0x70;
3944 instruction[3] = 0xc0 | (2 << 3) | 2;
3945 instruction[4] = 0;
3946 sljit_emit_op_custom(compiler, instruction, 5);
3947
3948 if (char1 != char2)
3949 {
3950 /* PSHUFD xmm1, xmm2/m128, imm8 */
3951 instruction[3] = 0xc0 | (3 << 3) | 3;
3952 instruction[4] = 0;
3953 sljit_emit_op_custom(compiler, instruction, 5);
3954 }
3955
3956 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3957 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3958
3959 /* MOVDQA xmm1, xmm2/m128 */
3960 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3961
3962 if (str_ptr_ind < 8)
3963 {
3964 instruction[2] = 0x6f;
3965 instruction[3] = (0 << 3) | str_ptr_ind;
3966 sljit_emit_op_custom(compiler, instruction, 4);
3967
3968 if (load_twice)
3969 {
3970 instruction[3] = (1 << 3) | str_ptr_ind;
3971 sljit_emit_op_custom(compiler, instruction, 4);
3972 }
3973 }
3974 else
3975 {
3976 instruction[1] = 0x41;
3977 instruction[2] = 0x0f;
3978 instruction[3] = 0x6f;
3979 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3980 sljit_emit_op_custom(compiler, instruction, 5);
3981
3982 if (load_twice)
3983 {
3984 instruction[4] = (1 << 3) | str_ptr_ind;
3985 sljit_emit_op_custom(compiler, instruction, 5);
3986 }
3987 instruction[1] = 0x0f;
3988 }
3989
3990 #else
3991
3992 instruction[2] = 0x6f;
3993 instruction[3] = (0 << 3) | str_ptr_ind;
3994 sljit_emit_op_custom(compiler, instruction, 4);
3995
3996 if (load_twice)
3997 {
3998 instruction[3] = (1 << 3) | str_ptr_ind;
3999 sljit_emit_op_custom(compiler, instruction, 4);
4000 }
4001
4002 #endif
4003
4004 if (bit != 0)
4005 {
4006 /* POR xmm1, xmm2/m128 */
4007 instruction[2] = 0xeb;
4008 instruction[3] = 0xc0 | (0 << 3) | 3;
4009 sljit_emit_op_custom(compiler, instruction, 4);
4010 }
4011
4012 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4013 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4014 instruction[3] = 0xc0 | (0 << 3) | 2;
4015 sljit_emit_op_custom(compiler, instruction, 4);
4016
4017 if (load_twice)
4018 {
4019 instruction[3] = 0xc0 | (1 << 3) | 3;
4020 sljit_emit_op_custom(compiler, instruction, 4);
4021 }
4022
4023 /* PMOVMSKB reg, xmm */
4024 instruction[2] = 0xd7;
4025 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4026 sljit_emit_op_custom(compiler, instruction, 4);
4027
4028 if (load_twice)
4029 {
4030 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4031 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4032 sljit_emit_op_custom(compiler, instruction, 4);
4033
4034 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4035 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4036 }
4037
4038 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4039
4040 /* BSF r32, r/m32 */
4041 instruction[0] = 0x0f;
4042 instruction[1] = 0xbc;
4043 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4044 sljit_emit_op_custom(compiler, instruction, 3);
4045
4046 nomatch = JUMP(SLJIT_ZERO);
4047
4048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4050 quit[1] = JUMP(SLJIT_JUMP);
4051
4052 JUMPHERE(nomatch);
4053
4054 start = LABEL();
4055 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4056 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4057
4058 /* Second part (aligned) */
4059
4060 instruction[0] = 0x66;
4061 instruction[1] = 0x0f;
4062
4063 /* MOVDQA xmm1, xmm2/m128 */
4064 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4065
4066 if (str_ptr_ind < 8)
4067 {
4068 instruction[2] = 0x6f;
4069 instruction[3] = (0 << 3) | str_ptr_ind;
4070 sljit_emit_op_custom(compiler, instruction, 4);
4071
4072 if (load_twice)
4073 {
4074 instruction[3] = (1 << 3) | str_ptr_ind;
4075 sljit_emit_op_custom(compiler, instruction, 4);
4076 }
4077 }
4078 else
4079 {
4080 instruction[1] = 0x41;
4081 instruction[2] = 0x0f;
4082 instruction[3] = 0x6f;
4083 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4084 sljit_emit_op_custom(compiler, instruction, 5);
4085
4086 if (load_twice)
4087 {
4088 instruction[4] = (1 << 3) | str_ptr_ind;
4089 sljit_emit_op_custom(compiler, instruction, 5);
4090 }
4091 instruction[1] = 0x0f;
4092 }
4093
4094 #else
4095
4096 instruction[2] = 0x6f;
4097 instruction[3] = (0 << 3) | str_ptr_ind;
4098 sljit_emit_op_custom(compiler, instruction, 4);
4099
4100 if (load_twice)
4101 {
4102 instruction[3] = (1 << 3) | str_ptr_ind;
4103 sljit_emit_op_custom(compiler, instruction, 4);
4104 }
4105
4106 #endif
4107
4108 if (bit != 0)
4109 {
4110 /* POR xmm1, xmm2/m128 */
4111 instruction[2] = 0xeb;
4112 instruction[3] = 0xc0 | (0 << 3) | 3;
4113 sljit_emit_op_custom(compiler, instruction, 4);
4114 }
4115
4116 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4117 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4118 instruction[3] = 0xc0 | (0 << 3) | 2;
4119 sljit_emit_op_custom(compiler, instruction, 4);
4120
4121 if (load_twice)
4122 {
4123 instruction[3] = 0xc0 | (1 << 3) | 3;
4124 sljit_emit_op_custom(compiler, instruction, 4);
4125 }
4126
4127 /* PMOVMSKB reg, xmm */
4128 instruction[2] = 0xd7;
4129 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4130 sljit_emit_op_custom(compiler, instruction, 4);
4131
4132 if (load_twice)
4133 {
4134 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4135 sljit_emit_op_custom(compiler, instruction, 4);
4136
4137 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4138 }
4139
4140 /* BSF r32, r/m32 */
4141 instruction[0] = 0x0f;
4142 instruction[1] = 0xbc;
4143 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4144 sljit_emit_op_custom(compiler, instruction, 3);
4145
4146 JUMPTO(SLJIT_ZERO, start);
4147
4148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4149
4150 start = LABEL();
4151 SET_LABEL(quit[0], start);
4152 SET_LABEL(quit[1], start);
4153 SET_LABEL(quit[2], start);
4154 }
4155
4156 #undef SSE2_COMPARE_TYPE_INDEX
4157
4158 #endif
4159
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4160 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4161 {
4162 DEFINE_COMPILER;
4163 struct sljit_label *start;
4164 struct sljit_jump *quit;
4165 struct sljit_jump *found;
4166 PCRE2_UCHAR mask;
4167 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4168 struct sljit_label *utf_start = NULL;
4169 struct sljit_jump *utf_quit = NULL;
4170 #endif
4171 BOOL has_match_end = (common->match_end_ptr != 0);
4172
4173 if (offset > 0)
4174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4175
4176 if (has_match_end)
4177 {
4178 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4179
4180 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4181 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4182 if (sljit_x86_is_cmov_available())
4183 {
4184 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4185 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4186 }
4187 #endif
4188 {
4189 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4190 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4191 JUMPHERE(quit);
4192 }
4193 }
4194
4195 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4196 if (common->utf && offset > 0)
4197 utf_start = LABEL();
4198 #endif
4199
4200 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4201
4202 /* SSE2 accelerated first character search. */
4203
4204 if (sljit_x86_is_sse2_available())
4205 {
4206 fast_forward_first_char2_sse2(common, char1, char2);
4207
4208 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
4209 if (common->mode == PCRE2_JIT_COMPLETE)
4210 {
4211 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4212 SLJIT_ASSERT(common->forced_quit_label == NULL);
4213 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
4214 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4215
4216 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4217 if (common->utf && offset > 0)
4218 {
4219 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
4220
4221 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4222 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4223 #if PCRE2_CODE_UNIT_WIDTH == 8
4224 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4225 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4226 #elif PCRE2_CODE_UNIT_WIDTH == 16
4227 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4228 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4229 #else
4230 #error "Unknown code width"
4231 #endif
4232 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4233 }
4234 #endif
4235
4236 if (offset > 0)
4237 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4238 }
4239 else if (sljit_x86_is_cmov_available())
4240 {
4241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4242 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4243 }
4244 else
4245 {
4246 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4247 OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4248 JUMPHERE(quit);
4249 }
4250
4251 if (has_match_end)
4252 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4253 return;
4254 }
4255
4256 #endif
4257
4258 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4259
4260 start = LABEL();
4261 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4262
4263 if (char1 == char2)
4264 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4265 else
4266 {
4267 mask = char1 ^ char2;
4268 if (is_powerof2(mask))
4269 {
4270 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4271 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4272 }
4273 else
4274 {
4275 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4276 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4277 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4278 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4279 found = JUMP(SLJIT_NOT_ZERO);
4280 }
4281 }
4282
4283 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4284 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4285
4286 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4287 if (common->utf && offset > 0)
4288 utf_quit = JUMP(SLJIT_JUMP);
4289 #endif
4290
4291 JUMPHERE(found);
4292
4293 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4294 if (common->utf && offset > 0)
4295 {
4296 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4298 #if PCRE2_CODE_UNIT_WIDTH == 8
4299 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4300 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4301 #elif PCRE2_CODE_UNIT_WIDTH == 16
4302 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4303 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4304 #else
4305 #error "Unknown code width"
4306 #endif
4307 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4308 JUMPHERE(utf_quit);
4309 }
4310 #endif
4311
4312 JUMPHERE(quit);
4313
4314 if (has_match_end)
4315 {
4316 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4317 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4318 if (offset > 0)
4319 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4320 JUMPHERE(quit);
4321 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4322 }
4323
4324 if (offset > 0)
4325 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4326 }
4327
fast_forward_first_n_chars(compiler_common * common)4328 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4329 {
4330 DEFINE_COMPILER;
4331 struct sljit_label *start;
4332 struct sljit_jump *quit;
4333 struct sljit_jump *match;
4334 /* bytes[0] represent the number of characters between 0
4335 and MAX_N_BYTES - 1, 255 represents any character. */
4336 PCRE2_UCHAR chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4337 sljit_s32 offset;
4338 PCRE2_UCHAR mask;
4339 PCRE2_UCHAR *char_set, *char_set_end;
4340 int i, max, from;
4341 int range_right = -1, range_len;
4342 sljit_u8 *update_table = NULL;
4343 BOOL in_range;
4344 sljit_u32 rec_count;
4345
4346 for (i = 0; i < MAX_N_CHARS; i++)
4347 chars[i * MAX_DIFF_CHARS] = 0;
4348
4349 rec_count = 10000;
4350 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4351
4352 if (max < 1)
4353 return FALSE;
4354
4355 in_range = FALSE;
4356 /* Prevent compiler "uninitialized" warning */
4357 from = 0;
4358 range_len = 4 /* minimum length */ - 1;
4359 for (i = 0; i <= max; i++)
4360 {
4361 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4362 {
4363 range_len = i - from;
4364 range_right = i - 1;
4365 }
4366
4367 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4368 {
4369 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4370 if (!in_range)
4371 {
4372 in_range = TRUE;
4373 from = i;
4374 }
4375 }
4376 else
4377 in_range = FALSE;
4378 }
4379
4380 if (range_right >= 0)
4381 {
4382 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4383 if (update_table == NULL)
4384 return TRUE;
4385 memset(update_table, IN_UCHARS(range_len), 256);
4386
4387 for (i = 0; i < range_len; i++)
4388 {
4389 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4390 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4391 char_set_end = char_set + char_set[0];
4392 char_set++;
4393 while (char_set <= char_set_end)
4394 {
4395 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4396 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4397 char_set++;
4398 }
4399 }
4400 }
4401
4402 offset = -1;
4403 /* Scan forward. */
4404 for (i = 0; i < max; i++)
4405 {
4406 if (offset == -1)
4407 {
4408 if (chars[i * MAX_DIFF_CHARS] <= 2)
4409 offset = i;
4410 }
4411 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4412 {
4413 if (chars[i * MAX_DIFF_CHARS] == 1)
4414 offset = i;
4415 else
4416 {
4417 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4418 if (!is_powerof2(mask))
4419 {
4420 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4421 if (is_powerof2(mask))
4422 offset = i;
4423 }
4424 }
4425 }
4426 }
4427
4428 if (range_right < 0)
4429 {
4430 if (offset < 0)
4431 return FALSE;
4432 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4433 /* Works regardless the value is 1 or 2. */
4434 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4435 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4436 return TRUE;
4437 }
4438
4439 if (range_right == offset)
4440 offset = -1;
4441
4442 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4443
4444 max -= 1;
4445 SLJIT_ASSERT(max > 0);
4446 if (common->match_end_ptr != 0)
4447 {
4448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4449 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4450 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4451 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4452 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4453 JUMPHERE(quit);
4454 }
4455 else
4456 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4457
4458 SLJIT_ASSERT(range_right >= 0);
4459
4460 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4461 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4462 #endif
4463
4464 start = LABEL();
4465 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4466
4467 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4468 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4469 #else
4470 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4471 #endif
4472
4473 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4474 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4475 #else
4476 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4477 #endif
4478 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4479 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4480
4481 if (offset >= 0)
4482 {
4483 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4485
4486 if (chars[offset * MAX_DIFF_CHARS] == 1)
4487 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4488 else
4489 {
4490 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4491 if (is_powerof2(mask))
4492 {
4493 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4494 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4495 }
4496 else
4497 {
4498 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4499 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4500 JUMPHERE(match);
4501 }
4502 }
4503 }
4504
4505 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4506 if (common->utf && offset != 0)
4507 {
4508 if (offset < 0)
4509 {
4510 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512 }
4513 else
4514 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4515 #if PCRE2_CODE_UNIT_WIDTH == 8
4516 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4517 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4518 #elif PCRE2_CODE_UNIT_WIDTH == 16
4519 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4520 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4521 #else
4522 #error "Unknown code width"
4523 #endif
4524 if (offset < 0)
4525 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4526 }
4527 #endif
4528
4529 if (offset >= 0)
4530 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4531
4532 JUMPHERE(quit);
4533
4534 if (common->match_end_ptr != 0)
4535 {
4536 if (range_right >= 0)
4537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4538 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4539 if (range_right >= 0)
4540 {
4541 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4542 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4543 JUMPHERE(quit);
4544 }
4545 }
4546 else
4547 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4548 return TRUE;
4549 }
4550
4551 #undef MAX_N_CHARS
4552
fast_forward_first_char(compiler_common * common,PCRE2_UCHAR first_char,BOOL caseless)4553 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless)
4554 {
4555 PCRE2_UCHAR oc;
4556
4557 oc = first_char;
4558 if (caseless)
4559 {
4560 oc = TABLE_GET(first_char, common->fcc, first_char);
4561 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
4562 if (first_char > 127 && common->utf)
4563 oc = UCD_OTHERCASE(first_char);
4564 #endif
4565 }
4566
4567 fast_forward_first_char2(common, first_char, oc, 0);
4568 }
4569
fast_forward_newline(compiler_common * common)4570 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4571 {
4572 DEFINE_COMPILER;
4573 struct sljit_label *loop;
4574 struct sljit_jump *lastchar;
4575 struct sljit_jump *firstchar;
4576 struct sljit_jump *quit;
4577 struct sljit_jump *foundcr = NULL;
4578 struct sljit_jump *notfoundnl;
4579 jump_list *newline = NULL;
4580
4581 if (common->match_end_ptr != 0)
4582 {
4583 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4584 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585 }
4586
4587 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4588 {
4589 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4590 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4591 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4592 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4593 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4594
4595 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4596 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4597 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4598 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4599 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4600 #endif
4601 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4602
4603 loop = LABEL();
4604 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4605 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4606 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4607 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4608 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4609 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4610
4611 JUMPHERE(quit);
4612 JUMPHERE(firstchar);
4613 JUMPHERE(lastchar);
4614
4615 if (common->match_end_ptr != 0)
4616 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4617 return;
4618 }
4619
4620 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4622 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4623 skip_char_back(common);
4624
4625 loop = LABEL();
4626 common->ff_newline_shortcut = loop;
4627
4628 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4629 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4630 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4631 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4632 check_newlinechar(common, common->nltype, &newline, FALSE);
4633 set_jumps(newline, loop);
4634
4635 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4636 {
4637 quit = JUMP(SLJIT_JUMP);
4638 JUMPHERE(foundcr);
4639 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4640 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4641 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4642 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4643 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4644 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4645 #endif
4646 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4647 JUMPHERE(notfoundnl);
4648 JUMPHERE(quit);
4649 }
4650 JUMPHERE(lastchar);
4651 JUMPHERE(firstchar);
4652
4653 if (common->match_end_ptr != 0)
4654 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4655 }
4656
4657 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4658
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4659 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4660 {
4661 DEFINE_COMPILER;
4662 struct sljit_label *start;
4663 struct sljit_jump *quit;
4664 struct sljit_jump *found = NULL;
4665 jump_list *matches = NULL;
4666 #if PCRE2_CODE_UNIT_WIDTH != 8
4667 struct sljit_jump *jump;
4668 #endif
4669
4670 if (common->match_end_ptr != 0)
4671 {
4672 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4673 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4674 }
4675
4676 start = LABEL();
4677 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4679 #ifdef SUPPORT_UNICODE
4680 if (common->utf)
4681 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4682 #endif
4683
4684 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4685 {
4686 #if PCRE2_CODE_UNIT_WIDTH != 8
4687 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4688 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4689 JUMPHERE(jump);
4690 #endif
4691 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4692 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4693 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4694 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4695 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4696 found = JUMP(SLJIT_NOT_ZERO);
4697 }
4698
4699 #ifdef SUPPORT_UNICODE
4700 if (common->utf)
4701 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4702 #endif
4703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4704 #ifdef SUPPORT_UNICODE
4705 #if PCRE2_CODE_UNIT_WIDTH == 8
4706 if (common->utf)
4707 {
4708 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4709 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4711 }
4712 #elif PCRE2_CODE_UNIT_WIDTH == 16
4713 if (common->utf)
4714 {
4715 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4716 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4717 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4718 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4719 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4721 }
4722 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
4723 #endif /* SUPPORT_UNICODE */
4724 JUMPTO(SLJIT_JUMP, start);
4725 if (found != NULL)
4726 JUMPHERE(found);
4727 if (matches != NULL)
4728 set_jumps(matches, LABEL());
4729 JUMPHERE(quit);
4730
4731 if (common->match_end_ptr != 0)
4732 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4733 }
4734
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)4735 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
4736 {
4737 DEFINE_COMPILER;
4738 struct sljit_label *loop;
4739 struct sljit_jump *toolong;
4740 struct sljit_jump *alreadyfound;
4741 struct sljit_jump *found;
4742 struct sljit_jump *foundoc = NULL;
4743 struct sljit_jump *notfound;
4744 sljit_u32 oc, bit;
4745
4746 SLJIT_ASSERT(common->req_char_ptr != 0);
4747 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4748 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
4749 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4750 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4751
4752 if (has_firstchar)
4753 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4754 else
4755 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4756
4757 loop = LABEL();
4758 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4759
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4761 oc = req_char;
4762 if (caseless)
4763 {
4764 oc = TABLE_GET(req_char, common->fcc, req_char);
4765 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
4766 if (req_char > 127 && common->utf)
4767 oc = UCD_OTHERCASE(req_char);
4768 #endif
4769 }
4770 if (req_char == oc)
4771 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4772 else
4773 {
4774 bit = req_char ^ oc;
4775 if (is_powerof2(bit))
4776 {
4777 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4778 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4779 }
4780 else
4781 {
4782 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4783 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4784 }
4785 }
4786 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4787 JUMPTO(SLJIT_JUMP, loop);
4788
4789 JUMPHERE(found);
4790 if (foundoc)
4791 JUMPHERE(foundoc);
4792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4793 JUMPHERE(alreadyfound);
4794 JUMPHERE(toolong);
4795 return notfound;
4796 }
4797
do_revertframes(compiler_common * common)4798 static void do_revertframes(compiler_common *common)
4799 {
4800 DEFINE_COMPILER;
4801 struct sljit_jump *jump;
4802 struct sljit_label *mainloop;
4803
4804 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4805 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4806 GET_LOCAL_BASE(TMP3, 0, 0);
4807
4808 /* Drop frames until we reach STACK_TOP. */
4809 mainloop = LABEL();
4810 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4811 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4812 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4813
4814 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4815 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4816 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4817 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4818 JUMPTO(SLJIT_JUMP, mainloop);
4819
4820 JUMPHERE(jump);
4821 jump = JUMP(SLJIT_SIG_LESS);
4822 /* End of dropping frames. */
4823 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4824
4825 JUMPHERE(jump);
4826 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4827 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4828 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4829 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4830 JUMPTO(SLJIT_JUMP, mainloop);
4831 }
4832
check_wordboundary(compiler_common * common)4833 static void check_wordboundary(compiler_common *common)
4834 {
4835 DEFINE_COMPILER;
4836 struct sljit_jump *skipread;
4837 jump_list *skipread_list = NULL;
4838 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
4839 struct sljit_jump *jump;
4840 #endif
4841
4842 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4843
4844 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4845 /* Get type of the previous char, and put it to LOCALS1. */
4846 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4849 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4850 skip_char_back(common);
4851 check_start_used_ptr(common);
4852 read_char(common);
4853
4854 /* Testing char type. */
4855 #ifdef SUPPORT_UNICODE
4856 if (common->use_ucp)
4857 {
4858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4859 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4860 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4861 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4862 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4863 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4864 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4865 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4866 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4867 JUMPHERE(jump);
4868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4869 }
4870 else
4871 #endif
4872 {
4873 #if PCRE2_CODE_UNIT_WIDTH != 8
4874 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4875 #elif defined SUPPORT_UNICODE
4876 /* Here LOCALS1 has already been zeroed. */
4877 jump = NULL;
4878 if (common->utf)
4879 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4880 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4881 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4882 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4883 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4885 #if PCRE2_CODE_UNIT_WIDTH != 8
4886 JUMPHERE(jump);
4887 #elif defined SUPPORT_UNICODE
4888 if (jump != NULL)
4889 JUMPHERE(jump);
4890 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4891 }
4892 JUMPHERE(skipread);
4893
4894 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4895 check_str_end(common, &skipread_list);
4896 peek_char(common, READ_CHAR_MAX);
4897
4898 /* Testing char type. This is a code duplication. */
4899 #ifdef SUPPORT_UNICODE
4900 if (common->use_ucp)
4901 {
4902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4903 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4904 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4905 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4906 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4907 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4908 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4909 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4910 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4911 JUMPHERE(jump);
4912 }
4913 else
4914 #endif
4915 {
4916 #if PCRE2_CODE_UNIT_WIDTH != 8
4917 /* TMP2 may be destroyed by peek_char. */
4918 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4919 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4920 #elif defined SUPPORT_UNICODE
4921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4922 jump = NULL;
4923 if (common->utf)
4924 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4925 #endif
4926 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4927 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4928 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4929 #if PCRE2_CODE_UNIT_WIDTH != 8
4930 JUMPHERE(jump);
4931 #elif defined SUPPORT_UNICODE
4932 if (jump != NULL)
4933 JUMPHERE(jump);
4934 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4935 }
4936 set_jumps(skipread_list, LABEL());
4937
4938 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4939 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4940 }
4941
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4942 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4943 {
4944 /* May destroy TMP1. */
4945 DEFINE_COMPILER;
4946 int ranges[MAX_RANGE_SIZE];
4947 sljit_u8 bit, cbit, all;
4948 int i, byte, length = 0;
4949
4950 bit = bits[0] & 0x1;
4951 /* All bits will be zero or one (since bit is zero or one). */
4952 all = -bit;
4953
4954 for (i = 0; i < 256; )
4955 {
4956 byte = i >> 3;
4957 if ((i & 0x7) == 0 && bits[byte] == all)
4958 i += 8;
4959 else
4960 {
4961 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4962 if (cbit != bit)
4963 {
4964 if (length >= MAX_RANGE_SIZE)
4965 return FALSE;
4966 ranges[length] = i;
4967 length++;
4968 bit = cbit;
4969 all = -cbit;
4970 }
4971 i++;
4972 }
4973 }
4974
4975 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4976 {
4977 if (length >= MAX_RANGE_SIZE)
4978 return FALSE;
4979 ranges[length] = 256;
4980 length++;
4981 }
4982
4983 if (length < 0 || length > 4)
4984 return FALSE;
4985
4986 bit = bits[0] & 0x1;
4987 if (invert) bit ^= 0x1;
4988
4989 /* No character is accepted. */
4990 if (length == 0 && bit == 0)
4991 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4992
4993 switch(length)
4994 {
4995 case 0:
4996 /* When bit != 0, all characters are accepted. */
4997 return TRUE;
4998
4999 case 1:
5000 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5001 return TRUE;
5002
5003 case 2:
5004 if (ranges[0] + 1 != ranges[1])
5005 {
5006 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5007 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5008 }
5009 else
5010 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5011 return TRUE;
5012
5013 case 3:
5014 if (bit != 0)
5015 {
5016 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5017 if (ranges[0] + 1 != ranges[1])
5018 {
5019 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5020 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5021 }
5022 else
5023 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5024 return TRUE;
5025 }
5026
5027 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5028 if (ranges[1] + 1 != ranges[2])
5029 {
5030 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5031 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5032 }
5033 else
5034 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5035 return TRUE;
5036
5037 case 4:
5038 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5039 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5040 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5041 && is_powerof2(ranges[2] - ranges[0]))
5042 {
5043 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5044 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5045 if (ranges[2] + 1 != ranges[3])
5046 {
5047 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5048 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5049 }
5050 else
5051 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5052 return TRUE;
5053 }
5054
5055 if (bit != 0)
5056 {
5057 i = 0;
5058 if (ranges[0] + 1 != ranges[1])
5059 {
5060 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5061 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5062 i = ranges[0];
5063 }
5064 else
5065 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5066
5067 if (ranges[2] + 1 != ranges[3])
5068 {
5069 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5070 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5071 }
5072 else
5073 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5074 return TRUE;
5075 }
5076
5077 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5078 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5079 if (ranges[1] + 1 != ranges[2])
5080 {
5081 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5082 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5083 }
5084 else
5085 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5086 return TRUE;
5087
5088 default:
5089 SLJIT_ASSERT_STOP();
5090 return FALSE;
5091 }
5092 }
5093
check_anynewline(compiler_common * common)5094 static void check_anynewline(compiler_common *common)
5095 {
5096 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5097 DEFINE_COMPILER;
5098
5099 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5100
5101 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5105 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5106 #if PCRE2_CODE_UNIT_WIDTH == 8
5107 if (common->utf)
5108 {
5109 #endif
5110 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5111 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5112 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5113 #if PCRE2_CODE_UNIT_WIDTH == 8
5114 }
5115 #endif
5116 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5117 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5118 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5119 }
5120
check_hspace(compiler_common * common)5121 static void check_hspace(compiler_common *common)
5122 {
5123 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5124 DEFINE_COMPILER;
5125
5126 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5127
5128 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5129 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5130 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5131 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5132 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5133 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5134 #if PCRE2_CODE_UNIT_WIDTH == 8
5135 if (common->utf)
5136 {
5137 #endif
5138 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5140 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5142 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5143 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5144 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5145 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5147 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5148 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5149 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5151 #if PCRE2_CODE_UNIT_WIDTH == 8
5152 }
5153 #endif
5154 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5155 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5156
5157 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5158 }
5159
check_vspace(compiler_common * common)5160 static void check_vspace(compiler_common *common)
5161 {
5162 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5163 DEFINE_COMPILER;
5164
5165 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5166
5167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5171 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5172 #if PCRE2_CODE_UNIT_WIDTH == 8
5173 if (common->utf)
5174 {
5175 #endif
5176 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5177 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5179 #if PCRE2_CODE_UNIT_WIDTH == 8
5180 }
5181 #endif
5182 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5183 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5184
5185 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5186 }
5187
5188 #define CHAR1 STR_END
5189 #define CHAR2 STACK_TOP
5190
do_casefulcmp(compiler_common * common)5191 static void do_casefulcmp(compiler_common *common)
5192 {
5193 DEFINE_COMPILER;
5194 struct sljit_jump *jump;
5195 struct sljit_label *label;
5196
5197 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5199 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5201 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5203
5204 label = LABEL();
5205 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5206 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5207 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5208 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5209 JUMPTO(SLJIT_NOT_ZERO, label);
5210
5211 JUMPHERE(jump);
5212 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5213 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5214 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5215 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5216 }
5217
5218 #define LCC_TABLE STACK_LIMIT
5219
do_caselesscmp(compiler_common * common)5220 static void do_caselesscmp(compiler_common *common)
5221 {
5222 DEFINE_COMPILER;
5223 struct sljit_jump *jump;
5224 struct sljit_label *label;
5225
5226 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5227 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5228
5229 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5232 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5233 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5234 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235
5236 label = LABEL();
5237 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5238 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5239 #if PCRE2_CODE_UNIT_WIDTH != 8
5240 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5241 #endif
5242 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5243 #if PCRE2_CODE_UNIT_WIDTH != 8
5244 JUMPHERE(jump);
5245 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5246 #endif
5247 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5248 #if PCRE2_CODE_UNIT_WIDTH != 8
5249 JUMPHERE(jump);
5250 #endif
5251 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5252 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5253 JUMPTO(SLJIT_NOT_ZERO, label);
5254
5255 JUMPHERE(jump);
5256 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5257 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5258 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5259 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5260 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5261 }
5262
5263 #undef LCC_TABLE
5264 #undef CHAR1
5265 #undef CHAR2
5266
5267 #if defined SUPPORT_UNICODE
5268
do_utf_caselesscmp(PCRE2_SPTR src1,jit_arguments * args,PCRE2_SPTR end1)5269 static PCRE2_SPTR SLJIT_CALL do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1)
5270 {
5271 /* This function would be ineffective to do in JIT level. */
5272 sljit_u32 c1, c2;
5273 PCRE2_SPTR src2 = args->startchar_ptr;
5274 PCRE2_SPTR end2 = args->end;
5275 const ucd_record *ur;
5276 const sljit_u32 *pp;
5277
5278 while (src1 < end1)
5279 {
5280 if (src2 >= end2)
5281 return (PCRE2_SPTR)1;
5282 GETCHARINC(c1, src1);
5283 GETCHARINC(c2, src2);
5284 ur = GET_UCD(c2);
5285 if (c1 != c2 && c1 != c2 + ur->other_case)
5286 {
5287 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5288 for (;;)
5289 {
5290 if (c1 < *pp) return NULL;
5291 if (c1 == *pp++) break;
5292 }
5293 }
5294 }
5295 return src2;
5296 }
5297
5298 #endif /* SUPPORT_UNICODE */
5299
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)5300 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
5301 compare_context *context, jump_list **backtracks)
5302 {
5303 DEFINE_COMPILER;
5304 unsigned int othercasebit = 0;
5305 PCRE2_SPTR othercasechar = NULL;
5306 #ifdef SUPPORT_UNICODE
5307 int utflength;
5308 #endif
5309
5310 if (caseless && char_has_othercase(common, cc))
5311 {
5312 othercasebit = char_get_othercase_bit(common, cc);
5313 SLJIT_ASSERT(othercasebit);
5314 /* Extracting bit difference info. */
5315 #if PCRE2_CODE_UNIT_WIDTH == 8
5316 othercasechar = cc + (othercasebit >> 8);
5317 othercasebit &= 0xff;
5318 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319 /* Note that this code only handles characters in the BMP. If there
5320 ever are characters outside the BMP whose othercase differs in only one
5321 bit from itself (there currently are none), this code will need to be
5322 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
5323 othercasechar = cc + (othercasebit >> 9);
5324 if ((othercasebit & 0x100) != 0)
5325 othercasebit = (othercasebit & 0xff) << 8;
5326 else
5327 othercasebit &= 0xff;
5328 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
5329 }
5330
5331 if (context->sourcereg == -1)
5332 {
5333 #if PCRE2_CODE_UNIT_WIDTH == 8
5334 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5335 if (context->length >= 4)
5336 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5337 else if (context->length >= 2)
5338 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5339 else
5340 #endif
5341 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5342 #elif PCRE2_CODE_UNIT_WIDTH == 16
5343 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5344 if (context->length >= 4)
5345 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5346 else
5347 #endif
5348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5349 #elif PCRE2_CODE_UNIT_WIDTH == 32
5350 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5351 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
5352 context->sourcereg = TMP2;
5353 }
5354
5355 #ifdef SUPPORT_UNICODE
5356 utflength = 1;
5357 if (common->utf && HAS_EXTRALEN(*cc))
5358 utflength += GET_EXTRALEN(*cc);
5359
5360 do
5361 {
5362 #endif
5363
5364 context->length -= IN_UCHARS(1);
5365 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
5366
5367 /* Unaligned read is supported. */
5368 if (othercasebit != 0 && othercasechar == cc)
5369 {
5370 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5371 context->oc.asuchars[context->ucharptr] = othercasebit;
5372 }
5373 else
5374 {
5375 context->c.asuchars[context->ucharptr] = *cc;
5376 context->oc.asuchars[context->ucharptr] = 0;
5377 }
5378 context->ucharptr++;
5379
5380 #if PCRE2_CODE_UNIT_WIDTH == 8
5381 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5382 #else
5383 if (context->ucharptr >= 2 || context->length == 0)
5384 #endif
5385 {
5386 if (context->length >= 4)
5387 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5388 else if (context->length >= 2)
5389 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5390 #if PCRE2_CODE_UNIT_WIDTH == 8
5391 else if (context->length >= 1)
5392 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5393 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5394 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5395
5396 switch(context->ucharptr)
5397 {
5398 case 4 / sizeof(PCRE2_UCHAR):
5399 if (context->oc.asint != 0)
5400 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5401 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5402 break;
5403
5404 case 2 / sizeof(PCRE2_UCHAR):
5405 if (context->oc.asushort != 0)
5406 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5407 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5408 break;
5409
5410 #if PCRE2_CODE_UNIT_WIDTH == 8
5411 case 1:
5412 if (context->oc.asbyte != 0)
5413 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5414 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5415 break;
5416 #endif
5417
5418 default:
5419 SLJIT_ASSERT_STOP();
5420 break;
5421 }
5422 context->ucharptr = 0;
5423 }
5424
5425 #else
5426
5427 /* Unaligned read is unsupported or in 32 bit mode. */
5428 if (context->length >= 1)
5429 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5430
5431 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5432
5433 if (othercasebit != 0 && othercasechar == cc)
5434 {
5435 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5436 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5437 }
5438 else
5439 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5440
5441 #endif
5442
5443 cc++;
5444 #ifdef SUPPORT_UNICODE
5445 utflength--;
5446 }
5447 while (utflength > 0);
5448 #endif
5449
5450 return cc;
5451 }
5452
5453 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5454
5455 #define SET_TYPE_OFFSET(value) \
5456 if ((value) != typeoffset) \
5457 { \
5458 if ((value) < typeoffset) \
5459 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5460 else \
5461 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5462 } \
5463 typeoffset = (value);
5464
5465 #define SET_CHAR_OFFSET(value) \
5466 if ((value) != charoffset) \
5467 { \
5468 if ((value) < charoffset) \
5469 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5470 else \
5471 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5472 } \
5473 charoffset = (value);
5474
5475 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
5476
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)5477 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
5478 {
5479 DEFINE_COMPILER;
5480 jump_list *found = NULL;
5481 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5482 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5483 struct sljit_jump *jump = NULL;
5484 PCRE2_SPTR ccbegin;
5485 int compares, invertcmp, numberofcmps;
5486 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
5487 BOOL utf = common->utf;
5488 #endif
5489
5490 #ifdef SUPPORT_UNICODE
5491 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5492 BOOL charsaved = FALSE;
5493 int typereg = TMP1;
5494 const sljit_u32 *other_cases;
5495 sljit_uw typeoffset;
5496 #endif
5497
5498 /* Scanning the necessary info. */
5499 cc++;
5500 ccbegin = cc;
5501 compares = 0;
5502
5503 if (cc[-1] & XCL_MAP)
5504 {
5505 min = 0;
5506 cc += 32 / sizeof(PCRE2_UCHAR);
5507 }
5508
5509 while (*cc != XCL_END)
5510 {
5511 compares++;
5512 if (*cc == XCL_SINGLE)
5513 {
5514 cc ++;
5515 GETCHARINCTEST(c, cc);
5516 if (c > max) max = c;
5517 if (c < min) min = c;
5518 #ifdef SUPPORT_UNICODE
5519 needschar = TRUE;
5520 #endif
5521 }
5522 else if (*cc == XCL_RANGE)
5523 {
5524 cc ++;
5525 GETCHARINCTEST(c, cc);
5526 if (c < min) min = c;
5527 GETCHARINCTEST(c, cc);
5528 if (c > max) max = c;
5529 #ifdef SUPPORT_UNICODE
5530 needschar = TRUE;
5531 #endif
5532 }
5533 #ifdef SUPPORT_UNICODE
5534 else
5535 {
5536 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5537 cc++;
5538 if (*cc == PT_CLIST)
5539 {
5540 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5541 while (*other_cases != NOTACHAR)
5542 {
5543 if (*other_cases > max) max = *other_cases;
5544 if (*other_cases < min) min = *other_cases;
5545 other_cases++;
5546 }
5547 }
5548 else
5549 {
5550 max = READ_CHAR_MAX;
5551 min = 0;
5552 }
5553
5554 switch(*cc)
5555 {
5556 case PT_ANY:
5557 /* Any either accepts everything or ignored. */
5558 if (cc[-1] == XCL_PROP)
5559 {
5560 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5561 if (list == backtracks)
5562 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5563 return;
5564 }
5565 break;
5566
5567 case PT_LAMP:
5568 case PT_GC:
5569 case PT_PC:
5570 case PT_ALNUM:
5571 needstype = TRUE;
5572 break;
5573
5574 case PT_SC:
5575 needsscript = TRUE;
5576 break;
5577
5578 case PT_SPACE:
5579 case PT_PXSPACE:
5580 case PT_WORD:
5581 case PT_PXGRAPH:
5582 case PT_PXPRINT:
5583 case PT_PXPUNCT:
5584 needstype = TRUE;
5585 needschar = TRUE;
5586 break;
5587
5588 case PT_CLIST:
5589 case PT_UCNC:
5590 needschar = TRUE;
5591 break;
5592
5593 default:
5594 SLJIT_ASSERT_STOP();
5595 break;
5596 }
5597 cc += 2;
5598 }
5599 #endif
5600 }
5601 SLJIT_ASSERT(compares > 0);
5602
5603 /* We are not necessary in utf mode even in 8 bit mode. */
5604 cc = ccbegin;
5605 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5606
5607 if ((cc[-1] & XCL_HASPROP) == 0)
5608 {
5609 if ((cc[-1] & XCL_MAP) != 0)
5610 {
5611 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5612 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5613 {
5614 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5615 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5616 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5617 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5618 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5619 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5620 }
5621
5622 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5623 JUMPHERE(jump);
5624
5625 cc += 32 / sizeof(PCRE2_UCHAR);
5626 }
5627 else
5628 {
5629 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5630 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5631 }
5632 }
5633 else if ((cc[-1] & XCL_MAP) != 0)
5634 {
5635 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5636 #ifdef SUPPORT_UNICODE
5637 charsaved = TRUE;
5638 #endif
5639 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5640 {
5641 #if PCRE2_CODE_UNIT_WIDTH == 8
5642 jump = NULL;
5643 if (common->utf)
5644 #endif
5645 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5646
5647 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5648 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5649 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5650 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5651 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5652 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5653
5654 #if PCRE2_CODE_UNIT_WIDTH == 8
5655 if (common->utf)
5656 #endif
5657 JUMPHERE(jump);
5658 }
5659
5660 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5661 cc += 32 / sizeof(PCRE2_UCHAR);
5662 }
5663
5664 #ifdef SUPPORT_UNICODE
5665 if (needstype || needsscript)
5666 {
5667 if (needschar && !charsaved)
5668 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5669
5670 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5671 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5673 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5674 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5676 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5677
5678 /* Before anything else, we deal with scripts. */
5679 if (needsscript)
5680 {
5681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5682 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5683
5684 ccbegin = cc;
5685
5686 while (*cc != XCL_END)
5687 {
5688 if (*cc == XCL_SINGLE)
5689 {
5690 cc ++;
5691 GETCHARINCTEST(c, cc);
5692 }
5693 else if (*cc == XCL_RANGE)
5694 {
5695 cc ++;
5696 GETCHARINCTEST(c, cc);
5697 GETCHARINCTEST(c, cc);
5698 }
5699 else
5700 {
5701 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5702 cc++;
5703 if (*cc == PT_SC)
5704 {
5705 compares--;
5706 invertcmp = (compares == 0 && list != backtracks);
5707 if (cc[-1] == XCL_NOTPROP)
5708 invertcmp ^= 0x1;
5709 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5710 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5711 }
5712 cc += 2;
5713 }
5714 }
5715
5716 cc = ccbegin;
5717 }
5718
5719 if (needschar)
5720 {
5721 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5722 }
5723
5724 if (needstype)
5725 {
5726 if (!needschar)
5727 {
5728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5729 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5730 }
5731 else
5732 {
5733 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5734 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5735 typereg = RETURN_ADDR;
5736 }
5737 }
5738 }
5739 #endif
5740
5741 /* Generating code. */
5742 charoffset = 0;
5743 numberofcmps = 0;
5744 #ifdef SUPPORT_UNICODE
5745 typeoffset = 0;
5746 #endif
5747
5748 while (*cc != XCL_END)
5749 {
5750 compares--;
5751 invertcmp = (compares == 0 && list != backtracks);
5752 jump = NULL;
5753
5754 if (*cc == XCL_SINGLE)
5755 {
5756 cc ++;
5757 GETCHARINCTEST(c, cc);
5758
5759 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5760 {
5761 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5762 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5763 numberofcmps++;
5764 }
5765 else if (numberofcmps > 0)
5766 {
5767 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5768 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5769 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5770 numberofcmps = 0;
5771 }
5772 else
5773 {
5774 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5775 numberofcmps = 0;
5776 }
5777 }
5778 else if (*cc == XCL_RANGE)
5779 {
5780 cc ++;
5781 GETCHARINCTEST(c, cc);
5782 SET_CHAR_OFFSET(c);
5783 GETCHARINCTEST(c, cc);
5784
5785 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5786 {
5787 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5788 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5789 numberofcmps++;
5790 }
5791 else if (numberofcmps > 0)
5792 {
5793 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5794 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5795 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5796 numberofcmps = 0;
5797 }
5798 else
5799 {
5800 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5801 numberofcmps = 0;
5802 }
5803 }
5804 #ifdef SUPPORT_UNICODE
5805 else
5806 {
5807 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5808 if (*cc == XCL_NOTPROP)
5809 invertcmp ^= 0x1;
5810 cc++;
5811 switch(*cc)
5812 {
5813 case PT_ANY:
5814 if (!invertcmp)
5815 jump = JUMP(SLJIT_JUMP);
5816 break;
5817
5818 case PT_LAMP:
5819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5820 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5821 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5822 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5823 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5824 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5825 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5826 break;
5827
5828 case PT_GC:
5829 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5830 SET_TYPE_OFFSET(c);
5831 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5832 break;
5833
5834 case PT_PC:
5835 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5836 break;
5837
5838 case PT_SC:
5839 compares++;
5840 /* Do nothing. */
5841 break;
5842
5843 case PT_SPACE:
5844 case PT_PXSPACE:
5845 SET_CHAR_OFFSET(9);
5846 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5847 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5848
5849 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5850 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5851
5852 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5853 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5854
5855 SET_TYPE_OFFSET(ucp_Zl);
5856 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5857 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5858 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5859 break;
5860
5861 case PT_WORD:
5862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5863 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5864 /* Fall through. */
5865
5866 case PT_ALNUM:
5867 SET_TYPE_OFFSET(ucp_Ll);
5868 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5869 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5870 SET_TYPE_OFFSET(ucp_Nd);
5871 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5872 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5873 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5874 break;
5875
5876 case PT_CLIST:
5877 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5878
5879 /* At least three characters are required.
5880 Otherwise this case would be handled by the normal code path. */
5881 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5882 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5883
5884 /* Optimizing character pairs, if their difference is power of 2. */
5885 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5886 {
5887 if (charoffset == 0)
5888 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5889 else
5890 {
5891 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5892 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5893 }
5894 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5895 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5896 other_cases += 2;
5897 }
5898 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5899 {
5900 if (charoffset == 0)
5901 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5902 else
5903 {
5904 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5905 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5906 }
5907 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5908 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5909
5910 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5911 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5912
5913 other_cases += 3;
5914 }
5915 else
5916 {
5917 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5918 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5919 }
5920
5921 while (*other_cases != NOTACHAR)
5922 {
5923 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5924 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5925 }
5926 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5927 break;
5928
5929 case PT_UCNC:
5930 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5931 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5932 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5933 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5935 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5936
5937 SET_CHAR_OFFSET(0xa0);
5938 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5939 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5940 SET_CHAR_OFFSET(0);
5941 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5942 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5943 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5944 break;
5945
5946 case PT_PXGRAPH:
5947 /* C and Z groups are the farthest two groups. */
5948 SET_TYPE_OFFSET(ucp_Ll);
5949 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5950 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5951
5952 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5953
5954 /* In case of ucp_Cf, we overwrite the result. */
5955 SET_CHAR_OFFSET(0x2066);
5956 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5957 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5958
5959 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5960 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5961
5962 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5963 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5964
5965 JUMPHERE(jump);
5966 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5967 break;
5968
5969 case PT_PXPRINT:
5970 /* C and Z groups are the farthest two groups. */
5971 SET_TYPE_OFFSET(ucp_Ll);
5972 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5974
5975 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5976 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5977
5978 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5979
5980 /* In case of ucp_Cf, we overwrite the result. */
5981 SET_CHAR_OFFSET(0x2066);
5982 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5983 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5984
5985 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5986 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5987
5988 JUMPHERE(jump);
5989 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5990 break;
5991
5992 case PT_PXPUNCT:
5993 SET_TYPE_OFFSET(ucp_Sc);
5994 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5995 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5996
5997 SET_CHAR_OFFSET(0);
5998 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5999 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
6000
6001 SET_TYPE_OFFSET(ucp_Pc);
6002 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6003 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
6004 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6005 break;
6006
6007 default:
6008 SLJIT_ASSERT_STOP();
6009 break;
6010 }
6011 cc += 2;
6012 }
6013 #endif
6014
6015 if (jump != NULL)
6016 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6017 }
6018
6019 if (found != NULL)
6020 set_jumps(found, LABEL());
6021 }
6022
6023 #undef SET_TYPE_OFFSET
6024 #undef SET_CHAR_OFFSET
6025
6026 #endif
6027
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)6028 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
6029 {
6030 DEFINE_COMPILER;
6031 int length;
6032 struct sljit_jump *jump[4];
6033 #ifdef SUPPORT_UNICODE
6034 struct sljit_label *label;
6035 #endif /* SUPPORT_UNICODE */
6036
6037 switch(type)
6038 {
6039 case OP_SOD:
6040 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6042 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6043 return cc;
6044
6045 case OP_SOM:
6046 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6048 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6049 return cc;
6050
6051 case OP_NOT_WORD_BOUNDARY:
6052 case OP_WORD_BOUNDARY:
6053 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6054 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6055 return cc;
6056
6057 case OP_EODN:
6058 /* Requires rather complex checks. */
6059 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6060 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6061 {
6062 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6064 if (common->mode == PCRE2_JIT_COMPLETE)
6065 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6066 else
6067 {
6068 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6069 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6070 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
6071 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6072 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
6073 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6074 check_partial(common, TRUE);
6075 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6076 JUMPHERE(jump[1]);
6077 }
6078 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6079 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6080 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6081 }
6082 else if (common->nltype == NLTYPE_FIXED)
6083 {
6084 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6085 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6086 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6087 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6088 }
6089 else
6090 {
6091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6092 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6093 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6094 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6095 jump[2] = JUMP(SLJIT_GREATER);
6096 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6097 /* Equal. */
6098 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6099 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6100 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6101
6102 JUMPHERE(jump[1]);
6103 if (common->nltype == NLTYPE_ANYCRLF)
6104 {
6105 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6106 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6107 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6108 }
6109 else
6110 {
6111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6112 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6113 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6114 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6115 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6116 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6117 }
6118 JUMPHERE(jump[2]);
6119 JUMPHERE(jump[3]);
6120 }
6121 JUMPHERE(jump[0]);
6122 check_partial(common, FALSE);
6123 return cc;
6124
6125 case OP_EOD:
6126 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6127 check_partial(common, FALSE);
6128 return cc;
6129
6130 case OP_DOLL:
6131 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6132 OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
6133 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6134
6135 if (!common->endonly)
6136 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6137 else
6138 {
6139 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6140 check_partial(common, FALSE);
6141 }
6142 return cc;
6143
6144 case OP_DOLLM:
6145 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6146 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6147 OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
6148 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6149 check_partial(common, FALSE);
6150 jump[0] = JUMP(SLJIT_JUMP);
6151 JUMPHERE(jump[1]);
6152
6153 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6154 {
6155 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6156 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6157 if (common->mode == PCRE2_JIT_COMPLETE)
6158 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6159 else
6160 {
6161 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6162 /* STR_PTR = STR_END - IN_UCHARS(1) */
6163 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6164 check_partial(common, TRUE);
6165 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6166 JUMPHERE(jump[1]);
6167 }
6168
6169 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6170 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6171 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6172 }
6173 else
6174 {
6175 peek_char(common, common->nlmax);
6176 check_newlinechar(common, common->nltype, backtracks, FALSE);
6177 }
6178 JUMPHERE(jump[0]);
6179 return cc;
6180
6181 case OP_CIRC:
6182 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6183 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6184 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6185 OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
6186 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6187 return cc;
6188
6189 case OP_CIRCM:
6190 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6192 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6193 OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
6194 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
6195 jump[0] = JUMP(SLJIT_JUMP);
6196 JUMPHERE(jump[1]);
6197
6198 if (!common->alt_circumflex)
6199 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6200
6201 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6202 {
6203 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6204 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6205 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6206 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6207 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6208 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6209 }
6210 else
6211 {
6212 skip_char_back(common);
6213 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6214 check_newlinechar(common, common->nltype, backtracks, FALSE);
6215 }
6216 JUMPHERE(jump[0]);
6217 return cc;
6218
6219 case OP_REVERSE:
6220 length = GET(cc, 0);
6221 if (length == 0)
6222 return cc + LINK_SIZE;
6223 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6224 #ifdef SUPPORT_UNICODE
6225 if (common->utf)
6226 {
6227 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6228 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6229 label = LABEL();
6230 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6231 skip_char_back(common);
6232 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6233 JUMPTO(SLJIT_NOT_ZERO, label);
6234 }
6235 else
6236 #endif
6237 {
6238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6239 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6240 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6241 }
6242 check_start_used_ptr(common);
6243 return cc + LINK_SIZE;
6244 }
6245 SLJIT_ASSERT_STOP();
6246 return cc;
6247 }
6248
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)6249 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
6250 {
6251 DEFINE_COMPILER;
6252 int length;
6253 unsigned int c, oc, bit;
6254 compare_context context;
6255 struct sljit_jump *jump[3];
6256 jump_list *end_list;
6257 #ifdef SUPPORT_UNICODE
6258 struct sljit_label *label;
6259 PCRE2_UCHAR propdata[5];
6260 #endif /* SUPPORT_UNICODE */
6261
6262 switch(type)
6263 {
6264 case OP_NOT_DIGIT:
6265 case OP_DIGIT:
6266 /* Digits are usually 0-9, so it is worth to optimize them. */
6267 if (check_str_ptr)
6268 detect_partial_match(common, backtracks);
6269 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6270 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
6271 read_char7_type(common, type == OP_NOT_DIGIT);
6272 else
6273 #endif
6274 read_char8_type(common, type == OP_NOT_DIGIT);
6275 /* Flip the starting bit in the negative case. */
6276 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6277 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6278 return cc;
6279
6280 case OP_NOT_WHITESPACE:
6281 case OP_WHITESPACE:
6282 if (check_str_ptr)
6283 detect_partial_match(common, backtracks);
6284 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6285 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
6286 read_char7_type(common, type == OP_NOT_WHITESPACE);
6287 else
6288 #endif
6289 read_char8_type(common, type == OP_NOT_WHITESPACE);
6290 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6291 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6292 return cc;
6293
6294 case OP_NOT_WORDCHAR:
6295 case OP_WORDCHAR:
6296 if (check_str_ptr)
6297 detect_partial_match(common, backtracks);
6298 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6299 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
6300 read_char7_type(common, type == OP_NOT_WORDCHAR);
6301 else
6302 #endif
6303 read_char8_type(common, type == OP_NOT_WORDCHAR);
6304 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6305 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6306 return cc;
6307
6308 case OP_ANY:
6309 if (check_str_ptr)
6310 detect_partial_match(common, backtracks);
6311 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6312 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6313 {
6314 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6315 end_list = NULL;
6316 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
6317 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6318 else
6319 check_str_end(common, &end_list);
6320
6321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6323 set_jumps(end_list, LABEL());
6324 JUMPHERE(jump[0]);
6325 }
6326 else
6327 check_newlinechar(common, common->nltype, backtracks, TRUE);
6328 return cc;
6329
6330 case OP_ALLANY:
6331 if (check_str_ptr)
6332 detect_partial_match(common, backtracks);
6333 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6334 if (common->utf)
6335 {
6336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6338 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
6339 #if PCRE2_CODE_UNIT_WIDTH == 8
6340 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6341 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6343 #elif PCRE2_CODE_UNIT_WIDTH == 16
6344 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6347 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6348 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6350 #endif
6351 JUMPHERE(jump[0]);
6352 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
6353 return cc;
6354 }
6355 #endif
6356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6357 return cc;
6358
6359 case OP_ANYBYTE:
6360 if (check_str_ptr)
6361 detect_partial_match(common, backtracks);
6362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6363 return cc;
6364
6365 #ifdef SUPPORT_UNICODE
6366 case OP_NOTPROP:
6367 case OP_PROP:
6368 propdata[0] = XCL_HASPROP;
6369 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6370 propdata[2] = cc[0];
6371 propdata[3] = cc[1];
6372 propdata[4] = XCL_END;
6373 if (check_str_ptr)
6374 detect_partial_match(common, backtracks);
6375 compile_xclass_matchingpath(common, propdata, backtracks);
6376 return cc + 2;
6377 #endif
6378
6379 case OP_ANYNL:
6380 if (check_str_ptr)
6381 detect_partial_match(common, backtracks);
6382 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6383 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6384 /* We don't need to handle soft partial matching case. */
6385 end_list = NULL;
6386 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
6387 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6388 else
6389 check_str_end(common, &end_list);
6390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6391 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6393 jump[2] = JUMP(SLJIT_JUMP);
6394 JUMPHERE(jump[0]);
6395 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6396 set_jumps(end_list, LABEL());
6397 JUMPHERE(jump[1]);
6398 JUMPHERE(jump[2]);
6399 return cc;
6400
6401 case OP_NOT_HSPACE:
6402 case OP_HSPACE:
6403 if (check_str_ptr)
6404 detect_partial_match(common, backtracks);
6405 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6406 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6407 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6408 return cc;
6409
6410 case OP_NOT_VSPACE:
6411 case OP_VSPACE:
6412 if (check_str_ptr)
6413 detect_partial_match(common, backtracks);
6414 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6415 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6416 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6417 return cc;
6418
6419 #ifdef SUPPORT_UNICODE
6420 case OP_EXTUNI:
6421 if (check_str_ptr)
6422 detect_partial_match(common, backtracks);
6423 read_char(common);
6424 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6426 /* Optimize register allocation: use a real register. */
6427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6428 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6429
6430 label = LABEL();
6431 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6432 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6433 read_char(common);
6434 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6436 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6437
6438 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6439 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6440 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6441 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6442 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6443 JUMPTO(SLJIT_NOT_ZERO, label);
6444
6445 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6446 JUMPHERE(jump[0]);
6447 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6448
6449 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
6450 {
6451 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6452 /* Since we successfully read a char above, partial matching must occure. */
6453 check_partial(common, TRUE);
6454 JUMPHERE(jump[0]);
6455 }
6456 return cc;
6457 #endif
6458
6459 case OP_CHAR:
6460 case OP_CHARI:
6461 length = 1;
6462 #ifdef SUPPORT_UNICODE
6463 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6464 #endif
6465 if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
6466 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6467 {
6468 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6469 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6470
6471 context.length = IN_UCHARS(length);
6472 context.sourcereg = -1;
6473 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6474 context.ucharptr = 0;
6475 #endif
6476 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6477 }
6478
6479 if (check_str_ptr)
6480 detect_partial_match(common, backtracks);
6481 #ifdef SUPPORT_UNICODE
6482 if (common->utf)
6483 {
6484 GETCHAR(c, cc);
6485 }
6486 else
6487 #endif
6488 c = *cc;
6489
6490 if (type == OP_CHAR || !char_has_othercase(common, cc))
6491 {
6492 read_char_range(common, c, c, FALSE);
6493 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6494 return cc + length;
6495 }
6496 oc = char_othercase(common, c);
6497 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6498 bit = c ^ oc;
6499 if (is_powerof2(bit))
6500 {
6501 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6502 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6503 return cc + length;
6504 }
6505 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6506 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6507 JUMPHERE(jump[0]);
6508 return cc + length;
6509
6510 case OP_NOT:
6511 case OP_NOTI:
6512 if (check_str_ptr)
6513 detect_partial_match(common, backtracks);
6514
6515 length = 1;
6516 #ifdef SUPPORT_UNICODE
6517 if (common->utf)
6518 {
6519 #if PCRE2_CODE_UNIT_WIDTH == 8
6520 c = *cc;
6521 if (c < 128)
6522 {
6523 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6524 if (type == OP_NOT || !char_has_othercase(common, cc))
6525 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6526 else
6527 {
6528 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6529 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6530 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6531 }
6532 /* Skip the variable-length character. */
6533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6534 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6535 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6537 JUMPHERE(jump[0]);
6538 return cc + 1;
6539 }
6540 else
6541 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6542 {
6543 GETCHARLEN(c, cc, length);
6544 }
6545 }
6546 else
6547 #endif /* SUPPORT_UNICODE */
6548 c = *cc;
6549
6550 if (type == OP_NOT || !char_has_othercase(common, cc))
6551 {
6552 read_char_range(common, c, c, TRUE);
6553 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6554 }
6555 else
6556 {
6557 oc = char_othercase(common, c);
6558 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6559 bit = c ^ oc;
6560 if (is_powerof2(bit))
6561 {
6562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6563 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6564 }
6565 else
6566 {
6567 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6568 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6569 }
6570 }
6571 return cc + length;
6572
6573 case OP_CLASS:
6574 case OP_NCLASS:
6575 if (check_str_ptr)
6576 detect_partial_match(common, backtracks);
6577
6578 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6579 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6580 read_char_range(common, 0, bit, type == OP_NCLASS);
6581 #else
6582 read_char_range(common, 0, 255, type == OP_NCLASS);
6583 #endif
6584
6585 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6586 return cc + 32 / sizeof(PCRE2_UCHAR);
6587
6588 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6589 jump[0] = NULL;
6590 if (common->utf)
6591 {
6592 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6593 if (type == OP_CLASS)
6594 {
6595 add_jump(compiler, backtracks, jump[0]);
6596 jump[0] = NULL;
6597 }
6598 }
6599 #elif PCRE2_CODE_UNIT_WIDTH != 8
6600 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6601 if (type == OP_CLASS)
6602 {
6603 add_jump(compiler, backtracks, jump[0]);
6604 jump[0] = NULL;
6605 }
6606 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
6607
6608 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6609 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6610 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6611 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6612 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6613 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6614
6615 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6616 if (jump[0] != NULL)
6617 JUMPHERE(jump[0]);
6618 #endif
6619 return cc + 32 / sizeof(PCRE2_UCHAR);
6620
6621 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6622 case OP_XCLASS:
6623 if (check_str_ptr)
6624 detect_partial_match(common, backtracks);
6625 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6626 return cc + GET(cc, 0) - 1;
6627 #endif
6628 }
6629 SLJIT_ASSERT_STOP();
6630 return cc;
6631 }
6632
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)6633 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
6634 {
6635 /* This function consumes at least one input character. */
6636 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6637 DEFINE_COMPILER;
6638 PCRE2_SPTR ccbegin = cc;
6639 compare_context context;
6640 int size;
6641
6642 context.length = 0;
6643 do
6644 {
6645 if (cc >= ccend)
6646 break;
6647
6648 if (*cc == OP_CHAR)
6649 {
6650 size = 1;
6651 #ifdef SUPPORT_UNICODE
6652 if (common->utf && HAS_EXTRALEN(cc[1]))
6653 size += GET_EXTRALEN(cc[1]);
6654 #endif
6655 }
6656 else if (*cc == OP_CHARI)
6657 {
6658 size = 1;
6659 #ifdef SUPPORT_UNICODE
6660 if (common->utf)
6661 {
6662 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6663 size = 0;
6664 else if (HAS_EXTRALEN(cc[1]))
6665 size += GET_EXTRALEN(cc[1]);
6666 }
6667 else
6668 #endif
6669 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6670 size = 0;
6671 }
6672 else
6673 size = 0;
6674
6675 cc += 1 + size;
6676 context.length += IN_UCHARS(size);
6677 }
6678 while (size > 0 && context.length <= 128);
6679
6680 cc = ccbegin;
6681 if (context.length > 0)
6682 {
6683 /* We have a fixed-length byte sequence. */
6684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6685 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6686
6687 context.sourcereg = -1;
6688 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6689 context.ucharptr = 0;
6690 #endif
6691 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6692 return cc;
6693 }
6694
6695 /* A non-fixed length character will be checked if length == 0. */
6696 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6697 }
6698
6699 /* Forward definitions. */
6700 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
6701 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6702
6703 #define PUSH_BACKTRACK(size, ccstart, error) \
6704 do \
6705 { \
6706 backtrack = sljit_alloc_memory(compiler, (size)); \
6707 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6708 return error; \
6709 memset(backtrack, 0, size); \
6710 backtrack->prev = parent->top; \
6711 backtrack->cc = (ccstart); \
6712 parent->top = backtrack; \
6713 } \
6714 while (0)
6715
6716 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6717 do \
6718 { \
6719 backtrack = sljit_alloc_memory(compiler, (size)); \
6720 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6721 return; \
6722 memset(backtrack, 0, size); \
6723 backtrack->prev = parent->top; \
6724 backtrack->cc = (ccstart); \
6725 parent->top = backtrack; \
6726 } \
6727 while (0)
6728
6729 #define BACKTRACK_AS(type) ((type *)backtrack)
6730
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)6731 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
6732 {
6733 /* The OVECTOR offset goes to TMP2. */
6734 DEFINE_COMPILER;
6735 int count = GET2(cc, 1 + IMM2_SIZE);
6736 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6737 unsigned int offset;
6738 jump_list *found = NULL;
6739
6740 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6741
6742 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6743
6744 count--;
6745 while (count-- > 0)
6746 {
6747 offset = GET2(slot, 0) << 1;
6748 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6749 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6750 slot += common->name_entry_size;
6751 }
6752
6753 offset = GET2(slot, 0) << 1;
6754 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6755 if (backtracks != NULL && !common->unset_backref)
6756 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6757
6758 set_jumps(found, LABEL());
6759 }
6760
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6761 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6762 {
6763 DEFINE_COMPILER;
6764 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6765 int offset = 0;
6766 struct sljit_jump *jump = NULL;
6767 struct sljit_jump *partial;
6768 struct sljit_jump *nopartial;
6769
6770 if (ref)
6771 {
6772 offset = GET2(cc, 1) << 1;
6773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6774 /* OVECTOR(1) contains the "string begin - 1" constant. */
6775 if (withchecks && !common->unset_backref)
6776 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6777 }
6778 else
6779 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6780
6781 #if defined SUPPORT_UNICODE
6782 if (common->utf && *cc == OP_REFI)
6783 {
6784 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6785 if (ref)
6786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6787 else
6788 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6789
6790 if (withchecks)
6791 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6792
6793 /* Needed to save important temporary registers. */
6794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6795 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), STR_PTR, 0);
6797 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6798 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6799 if (common->mode == PCRE2_JIT_COMPLETE)
6800 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6801 else
6802 {
6803 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6804 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6805 check_partial(common, FALSE);
6806 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6807 JUMPHERE(nopartial);
6808 }
6809 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6810 }
6811 else
6812 #endif /* SUPPORT_UNICODE */
6813 {
6814 if (ref)
6815 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6816 else
6817 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6818
6819 if (withchecks)
6820 jump = JUMP(SLJIT_ZERO);
6821
6822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6823 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6824 if (common->mode == PCRE2_JIT_COMPLETE)
6825 add_jump(compiler, backtracks, partial);
6826
6827 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6828 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6829
6830 if (common->mode != PCRE2_JIT_COMPLETE)
6831 {
6832 nopartial = JUMP(SLJIT_JUMP);
6833 JUMPHERE(partial);
6834 /* TMP2 -= STR_END - STR_PTR */
6835 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6836 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6837 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6838 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6839 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6840 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6841 JUMPHERE(partial);
6842 check_partial(common, FALSE);
6843 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6844 JUMPHERE(nopartial);
6845 }
6846 }
6847
6848 if (jump != NULL)
6849 {
6850 if (emptyfail)
6851 add_jump(compiler, backtracks, jump);
6852 else
6853 JUMPHERE(jump);
6854 }
6855 }
6856
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)6857 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
6858 {
6859 DEFINE_COMPILER;
6860 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6861 backtrack_common *backtrack;
6862 PCRE2_UCHAR type;
6863 int offset = 0;
6864 struct sljit_label *label;
6865 struct sljit_jump *zerolength;
6866 struct sljit_jump *jump = NULL;
6867 PCRE2_SPTR ccbegin = cc;
6868 int min = 0, max = 0;
6869 BOOL minimize;
6870
6871 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6872
6873 if (ref)
6874 offset = GET2(cc, 1) << 1;
6875 else
6876 cc += IMM2_SIZE;
6877 type = cc[1 + IMM2_SIZE];
6878
6879 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6880 minimize = (type & 0x1) != 0;
6881 switch(type)
6882 {
6883 case OP_CRSTAR:
6884 case OP_CRMINSTAR:
6885 min = 0;
6886 max = 0;
6887 cc += 1 + IMM2_SIZE + 1;
6888 break;
6889 case OP_CRPLUS:
6890 case OP_CRMINPLUS:
6891 min = 1;
6892 max = 0;
6893 cc += 1 + IMM2_SIZE + 1;
6894 break;
6895 case OP_CRQUERY:
6896 case OP_CRMINQUERY:
6897 min = 0;
6898 max = 1;
6899 cc += 1 + IMM2_SIZE + 1;
6900 break;
6901 case OP_CRRANGE:
6902 case OP_CRMINRANGE:
6903 min = GET2(cc, 1 + IMM2_SIZE + 1);
6904 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6905 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6906 break;
6907 default:
6908 SLJIT_ASSERT_STOP();
6909 break;
6910 }
6911
6912 if (!minimize)
6913 {
6914 if (min == 0)
6915 {
6916 allocate_stack(common, 2);
6917 if (ref)
6918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6921 /* Temporary release of STR_PTR. */
6922 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6923 /* Handles both invalid and empty cases. Since the minimum repeat,
6924 is zero the invalid case is basically the same as an empty case. */
6925 if (ref)
6926 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6927 else
6928 {
6929 compile_dnref_search(common, ccbegin, NULL);
6930 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6932 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6933 }
6934 /* Restore if not zero length. */
6935 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6936 }
6937 else
6938 {
6939 allocate_stack(common, 1);
6940 if (ref)
6941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6943 if (ref)
6944 {
6945 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6946 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6947 }
6948 else
6949 {
6950 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6953 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6954 }
6955 }
6956
6957 if (min > 1 || max > 1)
6958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6959
6960 label = LABEL();
6961 if (!ref)
6962 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6963 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6964
6965 if (min > 1 || max > 1)
6966 {
6967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6968 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6970 if (min > 1)
6971 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6972 if (max > 1)
6973 {
6974 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6975 allocate_stack(common, 1);
6976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6977 JUMPTO(SLJIT_JUMP, label);
6978 JUMPHERE(jump);
6979 }
6980 }
6981
6982 if (max == 0)
6983 {
6984 /* Includes min > 1 case as well. */
6985 allocate_stack(common, 1);
6986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6987 JUMPTO(SLJIT_JUMP, label);
6988 }
6989
6990 JUMPHERE(zerolength);
6991 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6992
6993 count_match(common);
6994 return cc;
6995 }
6996
6997 allocate_stack(common, ref ? 2 : 3);
6998 if (ref)
6999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7001 if (type != OP_CRMINSTAR)
7002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7003
7004 if (min == 0)
7005 {
7006 /* Handles both invalid and empty cases. Since the minimum repeat,
7007 is zero the invalid case is basically the same as an empty case. */
7008 if (ref)
7009 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7010 else
7011 {
7012 compile_dnref_search(common, ccbegin, NULL);
7013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7015 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7016 }
7017 /* Length is non-zero, we can match real repeats. */
7018 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7019 jump = JUMP(SLJIT_JUMP);
7020 }
7021 else
7022 {
7023 if (ref)
7024 {
7025 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7026 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7027 }
7028 else
7029 {
7030 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7032 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7033 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7034 }
7035 }
7036
7037 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7038 if (max > 0)
7039 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7040
7041 if (!ref)
7042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7043 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7045
7046 if (min > 1)
7047 {
7048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7049 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7051 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7052 }
7053 else if (max > 0)
7054 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7055
7056 if (jump != NULL)
7057 JUMPHERE(jump);
7058 JUMPHERE(zerolength);
7059
7060 count_match(common);
7061 return cc;
7062 }
7063
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7064 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7065 {
7066 DEFINE_COMPILER;
7067 backtrack_common *backtrack;
7068 recurse_entry *entry = common->entries;
7069 recurse_entry *prev = NULL;
7070 sljit_sw start = GET(cc, 1);
7071 PCRE2_SPTR start_cc;
7072 BOOL needs_control_head;
7073
7074 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7075
7076 /* Inlining simple patterns. */
7077 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7078 {
7079 start_cc = common->start + start;
7080 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7081 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7082 return cc + 1 + LINK_SIZE;
7083 }
7084
7085 while (entry != NULL)
7086 {
7087 if (entry->start == start)
7088 break;
7089 prev = entry;
7090 entry = entry->next;
7091 }
7092
7093 if (entry == NULL)
7094 {
7095 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7096 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7097 return NULL;
7098 entry->next = NULL;
7099 entry->entry = NULL;
7100 entry->calls = NULL;
7101 entry->start = start;
7102
7103 if (prev != NULL)
7104 prev->next = entry;
7105 else
7106 common->entries = entry;
7107 }
7108
7109 if (common->has_set_som && common->mark_ptr != 0)
7110 {
7111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7112 allocate_stack(common, 2);
7113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7114 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7116 }
7117 else if (common->has_set_som || common->mark_ptr != 0)
7118 {
7119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7120 allocate_stack(common, 1);
7121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7122 }
7123
7124 if (entry->entry == NULL)
7125 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7126 else
7127 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7128 /* Leave if the match is failed. */
7129 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7130 return cc + 1 + LINK_SIZE;
7131 }
7132
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)7133 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
7134 {
7135 PCRE2_SPTR begin = arguments->begin;
7136 PCRE2_SIZE *ovector = arguments->match_data->ovector;
7137 sljit_u32 oveccount = arguments->oveccount;
7138 sljit_u32 i;
7139
7140 if (arguments->callout == NULL)
7141 return 0;
7142
7143 callout_block->version = 1;
7144
7145 /* Offsets in subject. */
7146 callout_block->subject_length = arguments->end - arguments->begin;
7147 callout_block->start_match = (PCRE2_SPTR)callout_block->subject - arguments->begin;
7148 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - arguments->begin;
7149 callout_block->subject = begin;
7150
7151 /* Convert and copy the JIT offset vector to the ovector array. */
7152 callout_block->capture_top = 0;
7153 callout_block->offset_vector = ovector;
7154 for (i = 2; i < oveccount; i += 2)
7155 {
7156 ovector[i] = jit_ovector[i] - begin;
7157 ovector[i + 1] = jit_ovector[i + 1] - begin;
7158 if (jit_ovector[i] >= begin)
7159 callout_block->capture_top = i;
7160 }
7161
7162 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7163 ovector[0] = PCRE2_UNSET;
7164 ovector[1] = PCRE2_UNSET;
7165 return (arguments->callout)(callout_block, arguments->callout_data);
7166 }
7167
7168 /* Aligning to 8 byte. */
7169 #define CALLOUT_ARG_SIZE \
7170 (((int)sizeof(pcre2_callout_block) + 7) & ~7)
7171
7172 #define CALLOUT_ARG_OFFSET(arg) \
7173 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(pcre2_callout_block, arg))
7174
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7175 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7176 {
7177 DEFINE_COMPILER;
7178 backtrack_common *backtrack;
7179 sljit_s32 mov_opcode;
7180 unsigned int callout_length = (*cc == OP_CALLOUT)
7181 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
7182 sljit_sw value1;
7183 sljit_sw value2;
7184 sljit_sw value3;
7185
7186 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7187
7188 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7189
7190 SLJIT_ASSERT(common->capture_last_ptr != 0);
7191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7192 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7193 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
7194 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
7195 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7196
7197 /* These pointer sized fields temporarly stores internal variables. */
7198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7201
7202 if (common->mark_ptr != 0)
7203 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7204 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
7205 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
7206 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
7207
7208 if (*cc == OP_CALLOUT)
7209 {
7210 value1 = 0;
7211 value2 = 0;
7212 value3 = 0;
7213 }
7214 else
7215 {
7216 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
7217 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
7218 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
7219 }
7220
7221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
7222 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
7223 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
7224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7225
7226 /* Needed to save important temporary registers. */
7227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7228 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
7229 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7230 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7231 OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
7232 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7233 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7234
7235 /* Check return value. */
7236 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7237 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
7238 if (common->forced_quit_label == NULL)
7239 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
7240 else
7241 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
7242 return cc + callout_length;
7243 }
7244
7245 #undef CALLOUT_ARG_SIZE
7246 #undef CALLOUT_ARG_OFFSET
7247
assert_needs_str_ptr_saving(PCRE2_SPTR cc)7248 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
7249 {
7250 while (TRUE)
7251 {
7252 switch (*cc)
7253 {
7254 case OP_CALLOUT_STR:
7255 cc += GET(cc, 1 + 2*LINK_SIZE);
7256 break;
7257
7258 case OP_NOT_WORD_BOUNDARY:
7259 case OP_WORD_BOUNDARY:
7260 case OP_CIRC:
7261 case OP_CIRCM:
7262 case OP_DOLL:
7263 case OP_DOLLM:
7264 case OP_CALLOUT:
7265 case OP_ALT:
7266 cc += PRIV(OP_lengths)[*cc];
7267 break;
7268
7269 case OP_KET:
7270 return FALSE;
7271
7272 default:
7273 return TRUE;
7274 }
7275 }
7276 }
7277
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)7278 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
7279 {
7280 DEFINE_COMPILER;
7281 int framesize;
7282 int extrasize;
7283 BOOL needs_control_head;
7284 int private_data_ptr;
7285 backtrack_common altbacktrack;
7286 PCRE2_SPTR ccbegin;
7287 PCRE2_UCHAR opcode;
7288 PCRE2_UCHAR bra = OP_BRA;
7289 jump_list *tmp = NULL;
7290 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7291 jump_list **found;
7292 /* Saving previous accept variables. */
7293 BOOL save_local_exit = common->local_exit;
7294 BOOL save_positive_assert = common->positive_assert;
7295 then_trap_backtrack *save_then_trap = common->then_trap;
7296 struct sljit_label *save_quit_label = common->quit_label;
7297 struct sljit_label *save_accept_label = common->accept_label;
7298 jump_list *save_quit = common->quit;
7299 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7300 jump_list *save_accept = common->accept;
7301 struct sljit_jump *jump;
7302 struct sljit_jump *brajump = NULL;
7303
7304 /* Assert captures then. */
7305 common->then_trap = NULL;
7306
7307 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7308 {
7309 SLJIT_ASSERT(!conditional);
7310 bra = *cc;
7311 cc++;
7312 }
7313 private_data_ptr = PRIVATE_DATA(cc);
7314 SLJIT_ASSERT(private_data_ptr != 0);
7315 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7316 backtrack->framesize = framesize;
7317 backtrack->private_data_ptr = private_data_ptr;
7318 opcode = *cc;
7319 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7320 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7321 ccbegin = cc;
7322 cc += GET(cc, 1);
7323
7324 if (bra == OP_BRAMINZERO)
7325 {
7326 /* This is a braminzero backtrack path. */
7327 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7328 free_stack(common, 1);
7329 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7330 }
7331
7332 if (framesize < 0)
7333 {
7334 extrasize = 1;
7335 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7336 extrasize = 0;
7337
7338 if (needs_control_head)
7339 extrasize++;
7340
7341 if (framesize == no_frame)
7342 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7343
7344 if (extrasize > 0)
7345 allocate_stack(common, extrasize);
7346
7347 if (needs_control_head)
7348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7349
7350 if (extrasize > 0)
7351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7352
7353 if (needs_control_head)
7354 {
7355 SLJIT_ASSERT(extrasize == 2);
7356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7358 }
7359 }
7360 else
7361 {
7362 extrasize = needs_control_head ? 3 : 2;
7363 allocate_stack(common, framesize + extrasize);
7364
7365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7366 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7368 if (needs_control_head)
7369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7371
7372 if (needs_control_head)
7373 {
7374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7377 }
7378 else
7379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7380
7381 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7382 }
7383
7384 memset(&altbacktrack, 0, sizeof(backtrack_common));
7385 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7386 {
7387 /* Negative assert is stronger than positive assert. */
7388 common->local_exit = TRUE;
7389 common->quit_label = NULL;
7390 common->quit = NULL;
7391 common->positive_assert = FALSE;
7392 }
7393 else
7394 common->positive_assert = TRUE;
7395 common->positive_assert_quit = NULL;
7396
7397 while (1)
7398 {
7399 common->accept_label = NULL;
7400 common->accept = NULL;
7401 altbacktrack.top = NULL;
7402 altbacktrack.topbacktracks = NULL;
7403
7404 if (*ccbegin == OP_ALT && extrasize > 0)
7405 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7406
7407 altbacktrack.cc = ccbegin;
7408 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7409 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7410 {
7411 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7412 {
7413 common->local_exit = save_local_exit;
7414 common->quit_label = save_quit_label;
7415 common->quit = save_quit;
7416 }
7417 common->positive_assert = save_positive_assert;
7418 common->then_trap = save_then_trap;
7419 common->accept_label = save_accept_label;
7420 common->positive_assert_quit = save_positive_assert_quit;
7421 common->accept = save_accept;
7422 return NULL;
7423 }
7424 common->accept_label = LABEL();
7425 if (common->accept != NULL)
7426 set_jumps(common->accept, common->accept_label);
7427
7428 /* Reset stack. */
7429 if (framesize < 0)
7430 {
7431 if (framesize == no_frame)
7432 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7433 else if (extrasize > 0)
7434 free_stack(common, extrasize);
7435
7436 if (needs_control_head)
7437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7438 }
7439 else
7440 {
7441 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7442 {
7443 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7444 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7445 if (needs_control_head)
7446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7447 }
7448 else
7449 {
7450 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7451 if (needs_control_head)
7452 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
7453 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7454 }
7455 }
7456
7457 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7458 {
7459 /* We know that STR_PTR was stored on the top of the stack. */
7460 if (conditional)
7461 {
7462 if (extrasize > 0)
7463 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
7464 }
7465 else if (bra == OP_BRAZERO)
7466 {
7467 if (framesize < 0)
7468 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7469 else
7470 {
7471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7472 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
7473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7474 }
7475 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7477 }
7478 else if (framesize >= 0)
7479 {
7480 /* For OP_BRA and OP_BRAMINZERO. */
7481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7482 }
7483 }
7484 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7485
7486 compile_backtrackingpath(common, altbacktrack.top);
7487 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7488 {
7489 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7490 {
7491 common->local_exit = save_local_exit;
7492 common->quit_label = save_quit_label;
7493 common->quit = save_quit;
7494 }
7495 common->positive_assert = save_positive_assert;
7496 common->then_trap = save_then_trap;
7497 common->accept_label = save_accept_label;
7498 common->positive_assert_quit = save_positive_assert_quit;
7499 common->accept = save_accept;
7500 return NULL;
7501 }
7502 set_jumps(altbacktrack.topbacktracks, LABEL());
7503
7504 if (*cc != OP_ALT)
7505 break;
7506
7507 ccbegin = cc;
7508 cc += GET(cc, 1);
7509 }
7510
7511 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7512 {
7513 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7514 /* Makes the check less complicated below. */
7515 common->positive_assert_quit = common->quit;
7516 }
7517
7518 /* None of them matched. */
7519 if (common->positive_assert_quit != NULL)
7520 {
7521 jump = JUMP(SLJIT_JUMP);
7522 set_jumps(common->positive_assert_quit, LABEL());
7523 SLJIT_ASSERT(framesize != no_stack);
7524 if (framesize < 0)
7525 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7526 else
7527 {
7528 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7529 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7530 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7531 }
7532 JUMPHERE(jump);
7533 }
7534
7535 if (needs_control_head)
7536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7537
7538 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7539 {
7540 /* Assert is failed. */
7541 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7542 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7543
7544 if (framesize < 0)
7545 {
7546 /* The topmost item should be 0. */
7547 if (bra == OP_BRAZERO)
7548 {
7549 if (extrasize == 2)
7550 free_stack(common, 1);
7551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7552 }
7553 else if (extrasize > 0)
7554 free_stack(common, extrasize);
7555 }
7556 else
7557 {
7558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7559 /* The topmost item should be 0. */
7560 if (bra == OP_BRAZERO)
7561 {
7562 free_stack(common, framesize + extrasize - 1);
7563 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7564 }
7565 else
7566 free_stack(common, framesize + extrasize);
7567 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7568 }
7569 jump = JUMP(SLJIT_JUMP);
7570 if (bra != OP_BRAZERO)
7571 add_jump(compiler, target, jump);
7572
7573 /* Assert is successful. */
7574 set_jumps(tmp, LABEL());
7575 if (framesize < 0)
7576 {
7577 /* We know that STR_PTR was stored on the top of the stack. */
7578 if (extrasize > 0)
7579 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7580
7581 /* Keep the STR_PTR on the top of the stack. */
7582 if (bra == OP_BRAZERO)
7583 {
7584 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7585 if (extrasize == 2)
7586 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7587 }
7588 else if (bra == OP_BRAMINZERO)
7589 {
7590 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7592 }
7593 }
7594 else
7595 {
7596 if (bra == OP_BRA)
7597 {
7598 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7599 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7600 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
7601 }
7602 else
7603 {
7604 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7605 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7606 if (extrasize == 2)
7607 {
7608 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7609 if (bra == OP_BRAMINZERO)
7610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7611 }
7612 else
7613 {
7614 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7615 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7616 }
7617 }
7618 }
7619
7620 if (bra == OP_BRAZERO)
7621 {
7622 backtrack->matchingpath = LABEL();
7623 SET_LABEL(jump, backtrack->matchingpath);
7624 }
7625 else if (bra == OP_BRAMINZERO)
7626 {
7627 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7628 JUMPHERE(brajump);
7629 if (framesize >= 0)
7630 {
7631 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7632 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7634 }
7635 set_jumps(backtrack->common.topbacktracks, LABEL());
7636 }
7637 }
7638 else
7639 {
7640 /* AssertNot is successful. */
7641 if (framesize < 0)
7642 {
7643 if (extrasize > 0)
7644 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7645
7646 if (bra != OP_BRA)
7647 {
7648 if (extrasize == 2)
7649 free_stack(common, 1);
7650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7651 }
7652 else if (extrasize > 0)
7653 free_stack(common, extrasize);
7654 }
7655 else
7656 {
7657 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7659 /* The topmost item should be 0. */
7660 if (bra != OP_BRA)
7661 {
7662 free_stack(common, framesize + extrasize - 1);
7663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7664 }
7665 else
7666 free_stack(common, framesize + extrasize);
7667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7668 }
7669
7670 if (bra == OP_BRAZERO)
7671 backtrack->matchingpath = LABEL();
7672 else if (bra == OP_BRAMINZERO)
7673 {
7674 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7675 JUMPHERE(brajump);
7676 }
7677
7678 if (bra != OP_BRA)
7679 {
7680 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7681 set_jumps(backtrack->common.topbacktracks, LABEL());
7682 backtrack->common.topbacktracks = NULL;
7683 }
7684 }
7685
7686 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7687 {
7688 common->local_exit = save_local_exit;
7689 common->quit_label = save_quit_label;
7690 common->quit = save_quit;
7691 }
7692 common->positive_assert = save_positive_assert;
7693 common->then_trap = save_then_trap;
7694 common->accept_label = save_accept_label;
7695 common->positive_assert_quit = save_positive_assert_quit;
7696 common->accept = save_accept;
7697 return cc + 1 + LINK_SIZE;
7698 }
7699
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7700 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7701 {
7702 DEFINE_COMPILER;
7703 int stacksize;
7704
7705 if (framesize < 0)
7706 {
7707 if (framesize == no_frame)
7708 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7709 else
7710 {
7711 stacksize = needs_control_head ? 1 : 0;
7712 if (ket != OP_KET || has_alternatives)
7713 stacksize++;
7714
7715 if (stacksize > 0)
7716 free_stack(common, stacksize);
7717 }
7718
7719 if (needs_control_head)
7720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
7721
7722 /* TMP2 which is set here used by OP_KETRMAX below. */
7723 if (ket == OP_KETRMAX)
7724 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
7725 else if (ket == OP_KETRMIN)
7726 {
7727 /* Move the STR_PTR to the private_data_ptr. */
7728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
7729 }
7730 }
7731 else
7732 {
7733 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7734 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7735 if (needs_control_head)
7736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
7737
7738 if (ket == OP_KETRMAX)
7739 {
7740 /* TMP2 which is set here used by OP_KETRMAX below. */
7741 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7742 }
7743 }
7744 if (needs_control_head)
7745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7746 }
7747
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7748 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7749 {
7750 DEFINE_COMPILER;
7751
7752 if (common->capture_last_ptr != 0)
7753 {
7754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7756 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7757 stacksize++;
7758 }
7759 if (common->optimized_cbracket[offset >> 1] == 0)
7760 {
7761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7762 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7764 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7765 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7768 stacksize += 2;
7769 }
7770 return stacksize;
7771 }
7772
7773 /*
7774 Handling bracketed expressions is probably the most complex part.
7775
7776 Stack layout naming characters:
7777 S - Push the current STR_PTR
7778 0 - Push a 0 (NULL)
7779 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7780 before the next alternative. Not pushed if there are no alternatives.
7781 M - Any values pushed by the current alternative. Can be empty, or anything.
7782 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7783 L - Push the previous local (pointed by localptr) to the stack
7784 () - opional values stored on the stack
7785 ()* - optonal, can be stored multiple times
7786
7787 The following list shows the regular expression templates, their PCRE byte codes
7788 and stack layout supported by pcre-sljit.
7789
7790 (?:) OP_BRA | OP_KET A M
7791 () OP_CBRA | OP_KET C M
7792 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7793 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7794 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7795 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7796 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7797 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7798 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7799 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7800 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7801 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7802 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7803 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7804 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7805 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7806 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7807 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7808 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7809 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7810 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7811 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7812
7813
7814 Stack layout naming characters:
7815 A - Push the alternative index (starting from 0) on the stack.
7816 Not pushed if there is no alternatives.
7817 M - Any values pushed by the current alternative. Can be empty, or anything.
7818
7819 The next list shows the possible content of a bracket:
7820 (|) OP_*BRA | OP_ALT ... M A
7821 (?()|) OP_*COND | OP_ALT M A
7822 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
7823 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
7824 Or nothing, if trace is unnecessary
7825 */
7826
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7827 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7828 {
7829 DEFINE_COMPILER;
7830 backtrack_common *backtrack;
7831 PCRE2_UCHAR opcode;
7832 int private_data_ptr = 0;
7833 int offset = 0;
7834 int i, stacksize;
7835 int repeat_ptr = 0, repeat_length = 0;
7836 int repeat_type = 0, repeat_count = 0;
7837 PCRE2_SPTR ccbegin;
7838 PCRE2_SPTR matchingpath;
7839 PCRE2_SPTR slot;
7840 PCRE2_UCHAR bra = OP_BRA;
7841 PCRE2_UCHAR ket;
7842 assert_backtrack *assert;
7843 BOOL has_alternatives;
7844 BOOL needs_control_head = FALSE;
7845 struct sljit_jump *jump;
7846 struct sljit_jump *skip;
7847 struct sljit_label *rmax_label = NULL;
7848 struct sljit_jump *braminzero = NULL;
7849
7850 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7851
7852 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7853 {
7854 bra = *cc;
7855 cc++;
7856 opcode = *cc;
7857 }
7858
7859 opcode = *cc;
7860 ccbegin = cc;
7861 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7862 ket = *matchingpath;
7863 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7864 {
7865 repeat_ptr = PRIVATE_DATA(matchingpath);
7866 repeat_length = PRIVATE_DATA(matchingpath + 1);
7867 repeat_type = PRIVATE_DATA(matchingpath + 2);
7868 repeat_count = PRIVATE_DATA(matchingpath + 3);
7869 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7870 if (repeat_type == OP_UPTO)
7871 ket = OP_KETRMAX;
7872 if (repeat_type == OP_MINUPTO)
7873 ket = OP_KETRMIN;
7874 }
7875
7876 matchingpath = ccbegin + 1 + LINK_SIZE;
7877 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7878 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7879 cc += GET(cc, 1);
7880
7881 has_alternatives = *cc == OP_ALT;
7882 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7883 {
7884 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
7885 compile_time_checks_must_be_grouped_together);
7886 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7887 }
7888
7889 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7890 opcode = OP_SCOND;
7891 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7892 opcode = OP_ONCE;
7893
7894 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7895 {
7896 /* Capturing brackets has a pre-allocated space. */
7897 offset = GET2(ccbegin, 1 + LINK_SIZE);
7898 if (common->optimized_cbracket[offset] == 0)
7899 {
7900 private_data_ptr = OVECTOR_PRIV(offset);
7901 offset <<= 1;
7902 }
7903 else
7904 {
7905 offset <<= 1;
7906 private_data_ptr = OVECTOR(offset);
7907 }
7908 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7909 matchingpath += IMM2_SIZE;
7910 }
7911 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7912 {
7913 /* Other brackets simply allocate the next entry. */
7914 private_data_ptr = PRIVATE_DATA(ccbegin);
7915 SLJIT_ASSERT(private_data_ptr != 0);
7916 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7917 if (opcode == OP_ONCE)
7918 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7919 }
7920
7921 /* Instructions before the first alternative. */
7922 stacksize = 0;
7923 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7924 stacksize++;
7925 if (bra == OP_BRAZERO)
7926 stacksize++;
7927
7928 if (stacksize > 0)
7929 allocate_stack(common, stacksize);
7930
7931 stacksize = 0;
7932 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7933 {
7934 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7935 stacksize++;
7936 }
7937
7938 if (bra == OP_BRAZERO)
7939 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7940
7941 if (bra == OP_BRAMINZERO)
7942 {
7943 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7944 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7945 if (ket != OP_KETRMIN)
7946 {
7947 free_stack(common, 1);
7948 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7949 }
7950 else
7951 {
7952 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7953 {
7954 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7955 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7956 /* Nothing stored during the first run. */
7957 skip = JUMP(SLJIT_JUMP);
7958 JUMPHERE(jump);
7959 /* Checking zero-length iteration. */
7960 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7961 {
7962 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7963 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7964 }
7965 else
7966 {
7967 /* Except when the whole stack frame must be saved. */
7968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7969 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7970 }
7971 JUMPHERE(skip);
7972 }
7973 else
7974 {
7975 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7976 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7977 JUMPHERE(jump);
7978 }
7979 }
7980 }
7981
7982 if (repeat_type != 0)
7983 {
7984 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7985 if (repeat_type == OP_EXACT)
7986 rmax_label = LABEL();
7987 }
7988
7989 if (ket == OP_KETRMIN)
7990 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7991
7992 if (ket == OP_KETRMAX)
7993 {
7994 rmax_label = LABEL();
7995 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7996 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7997 }
7998
7999 /* Handling capturing brackets and alternatives. */
8000 if (opcode == OP_ONCE)
8001 {
8002 stacksize = 0;
8003 if (needs_control_head)
8004 {
8005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8006 stacksize++;
8007 }
8008
8009 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8010 {
8011 /* Neither capturing brackets nor recursions are found in the block. */
8012 if (ket == OP_KETRMIN)
8013 {
8014 stacksize += 2;
8015 if (!needs_control_head)
8016 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8017 }
8018 else
8019 {
8020 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8022 if (ket == OP_KETRMAX || has_alternatives)
8023 stacksize++;
8024 }
8025
8026 if (stacksize > 0)
8027 allocate_stack(common, stacksize);
8028
8029 stacksize = 0;
8030 if (needs_control_head)
8031 {
8032 stacksize++;
8033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8034 }
8035
8036 if (ket == OP_KETRMIN)
8037 {
8038 if (needs_control_head)
8039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8041 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8042 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8044 }
8045 else if (ket == OP_KETRMAX || has_alternatives)
8046 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8047 }
8048 else
8049 {
8050 if (ket != OP_KET || has_alternatives)
8051 stacksize++;
8052
8053 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8054 allocate_stack(common, stacksize);
8055
8056 if (needs_control_head)
8057 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8058
8059 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8060 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8061
8062 stacksize = needs_control_head ? 1 : 0;
8063 if (ket != OP_KET || has_alternatives)
8064 {
8065 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8067 stacksize++;
8068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8069 }
8070 else
8071 {
8072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8073 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8074 }
8075 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8076 }
8077 }
8078 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8079 {
8080 /* Saving the previous values. */
8081 if (common->optimized_cbracket[offset >> 1] != 0)
8082 {
8083 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8084 allocate_stack(common, 2);
8085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8088 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8090 }
8091 else
8092 {
8093 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8094 allocate_stack(common, 1);
8095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8097 }
8098 }
8099 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8100 {
8101 /* Saving the previous value. */
8102 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8103 allocate_stack(common, 1);
8104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8106 }
8107 else if (has_alternatives)
8108 {
8109 /* Pushing the starting string pointer. */
8110 allocate_stack(common, 1);
8111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8112 }
8113
8114 /* Generating code for the first alternative. */
8115 if (opcode == OP_COND || opcode == OP_SCOND)
8116 {
8117 if (*matchingpath == OP_CREF)
8118 {
8119 SLJIT_ASSERT(has_alternatives);
8120 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8121 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8122 matchingpath += 1 + IMM2_SIZE;
8123 }
8124 else if (*matchingpath == OP_DNCREF)
8125 {
8126 SLJIT_ASSERT(has_alternatives);
8127
8128 i = GET2(matchingpath, 1 + IMM2_SIZE);
8129 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8130 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8131 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8132 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8133 slot += common->name_entry_size;
8134 i--;
8135 while (i-- > 0)
8136 {
8137 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8138 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
8139 slot += common->name_entry_size;
8140 }
8141 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8142 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8143 matchingpath += 1 + 2 * IMM2_SIZE;
8144 }
8145 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
8146 {
8147 /* Never has other case. */
8148 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8149 SLJIT_ASSERT(!has_alternatives);
8150
8151 if (*matchingpath == OP_TRUE)
8152 {
8153 stacksize = 1;
8154 matchingpath++;
8155 }
8156 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
8157 stacksize = 0;
8158 else if (*matchingpath == OP_RREF)
8159 {
8160 stacksize = GET2(matchingpath, 1);
8161 if (common->currententry == NULL)
8162 stacksize = 0;
8163 else if (stacksize == RREF_ANY)
8164 stacksize = 1;
8165 else if (common->currententry->start == 0)
8166 stacksize = stacksize == 0;
8167 else
8168 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8169
8170 if (stacksize != 0)
8171 matchingpath += 1 + IMM2_SIZE;
8172 }
8173 else
8174 {
8175 if (common->currententry == NULL || common->currententry->start == 0)
8176 stacksize = 0;
8177 else
8178 {
8179 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8180 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8181 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8182 while (stacksize > 0)
8183 {
8184 if ((int)GET2(slot, 0) == i)
8185 break;
8186 slot += common->name_entry_size;
8187 stacksize--;
8188 }
8189 }
8190
8191 if (stacksize != 0)
8192 matchingpath += 1 + 2 * IMM2_SIZE;
8193 }
8194
8195 /* The stacksize == 0 is a common "else" case. */
8196 if (stacksize == 0)
8197 {
8198 if (*cc == OP_ALT)
8199 {
8200 matchingpath = cc + 1 + LINK_SIZE;
8201 cc += GET(cc, 1);
8202 }
8203 else
8204 matchingpath = cc;
8205 }
8206 }
8207 else
8208 {
8209 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8210 /* Similar code as PUSH_BACKTRACK macro. */
8211 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8212 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8213 return NULL;
8214 memset(assert, 0, sizeof(assert_backtrack));
8215 assert->common.cc = matchingpath;
8216 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8217 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8218 }
8219 }
8220
8221 compile_matchingpath(common, matchingpath, cc, backtrack);
8222 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8223 return NULL;
8224
8225 if (opcode == OP_ONCE)
8226 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8227
8228 stacksize = 0;
8229 if (repeat_type == OP_MINUPTO)
8230 {
8231 /* We need to preserve the counter. TMP2 will be used below. */
8232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8233 stacksize++;
8234 }
8235 if (ket != OP_KET || bra != OP_BRA)
8236 stacksize++;
8237 if (offset != 0)
8238 {
8239 if (common->capture_last_ptr != 0)
8240 stacksize++;
8241 if (common->optimized_cbracket[offset >> 1] == 0)
8242 stacksize += 2;
8243 }
8244 if (has_alternatives && opcode != OP_ONCE)
8245 stacksize++;
8246
8247 if (stacksize > 0)
8248 allocate_stack(common, stacksize);
8249
8250 stacksize = 0;
8251 if (repeat_type == OP_MINUPTO)
8252 {
8253 /* TMP2 was set above. */
8254 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8255 stacksize++;
8256 }
8257
8258 if (ket != OP_KET || bra != OP_BRA)
8259 {
8260 if (ket != OP_KET)
8261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8262 else
8263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8264 stacksize++;
8265 }
8266
8267 if (offset != 0)
8268 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8269
8270 if (has_alternatives)
8271 {
8272 if (opcode != OP_ONCE)
8273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8274 if (ket != OP_KETRMAX)
8275 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8276 }
8277
8278 /* Must be after the matchingpath label. */
8279 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8280 {
8281 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8283 }
8284
8285 if (ket == OP_KETRMAX)
8286 {
8287 if (repeat_type != 0)
8288 {
8289 if (has_alternatives)
8290 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8291 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8292 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8293 /* Drop STR_PTR for greedy plus quantifier. */
8294 if (opcode != OP_ONCE)
8295 free_stack(common, 1);
8296 }
8297 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8298 {
8299 if (has_alternatives)
8300 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8301 /* Checking zero-length iteration. */
8302 if (opcode != OP_ONCE)
8303 {
8304 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8305 /* Drop STR_PTR for greedy plus quantifier. */
8306 if (bra != OP_BRAZERO)
8307 free_stack(common, 1);
8308 }
8309 else
8310 /* TMP2 must contain the starting STR_PTR. */
8311 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8312 }
8313 else
8314 JUMPTO(SLJIT_JUMP, rmax_label);
8315 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8316 }
8317
8318 if (repeat_type == OP_EXACT)
8319 {
8320 count_match(common);
8321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8322 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8323 }
8324 else if (repeat_type == OP_UPTO)
8325 {
8326 /* We need to preserve the counter. */
8327 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8328 allocate_stack(common, 1);
8329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8330 }
8331
8332 if (bra == OP_BRAZERO)
8333 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8334
8335 if (bra == OP_BRAMINZERO)
8336 {
8337 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8338 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8339 if (braminzero != NULL)
8340 {
8341 JUMPHERE(braminzero);
8342 /* We need to release the end pointer to perform the
8343 backtrack for the zero-length iteration. When
8344 framesize is < 0, OP_ONCE will do the release itself. */
8345 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8346 {
8347 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8348 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8349 }
8350 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8351 free_stack(common, 1);
8352 }
8353 /* Continue to the normal backtrack. */
8354 }
8355
8356 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8357 count_match(common);
8358
8359 /* Skip the other alternatives. */
8360 while (*cc == OP_ALT)
8361 cc += GET(cc, 1);
8362 cc += 1 + LINK_SIZE;
8363
8364 if (opcode == OP_ONCE)
8365 {
8366 /* We temporarily encode the needs_control_head in the lowest bit.
8367 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8368 the same value for small signed numbers (including negative numbers). */
8369 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8370 }
8371 return cc + repeat_length;
8372 }
8373
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8374 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8375 {
8376 DEFINE_COMPILER;
8377 backtrack_common *backtrack;
8378 PCRE2_UCHAR opcode;
8379 int private_data_ptr;
8380 int cbraprivptr = 0;
8381 BOOL needs_control_head;
8382 int framesize;
8383 int stacksize;
8384 int offset = 0;
8385 BOOL zero = FALSE;
8386 PCRE2_SPTR ccbegin = NULL;
8387 int stack; /* Also contains the offset of control head. */
8388 struct sljit_label *loop = NULL;
8389 struct jump_list *emptymatch = NULL;
8390
8391 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8392 if (*cc == OP_BRAPOSZERO)
8393 {
8394 zero = TRUE;
8395 cc++;
8396 }
8397
8398 opcode = *cc;
8399 private_data_ptr = PRIVATE_DATA(cc);
8400 SLJIT_ASSERT(private_data_ptr != 0);
8401 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8402 switch(opcode)
8403 {
8404 case OP_BRAPOS:
8405 case OP_SBRAPOS:
8406 ccbegin = cc + 1 + LINK_SIZE;
8407 break;
8408
8409 case OP_CBRAPOS:
8410 case OP_SCBRAPOS:
8411 offset = GET2(cc, 1 + LINK_SIZE);
8412 /* This case cannot be optimized in the same was as
8413 normal capturing brackets. */
8414 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8415 cbraprivptr = OVECTOR_PRIV(offset);
8416 offset <<= 1;
8417 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8418 break;
8419
8420 default:
8421 SLJIT_ASSERT_STOP();
8422 break;
8423 }
8424
8425 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8426 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8427 if (framesize < 0)
8428 {
8429 if (offset != 0)
8430 {
8431 stacksize = 2;
8432 if (common->capture_last_ptr != 0)
8433 stacksize++;
8434 }
8435 else
8436 stacksize = 1;
8437
8438 if (needs_control_head)
8439 stacksize++;
8440 if (!zero)
8441 stacksize++;
8442
8443 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8444 allocate_stack(common, stacksize);
8445 if (framesize == no_frame)
8446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8447
8448 stack = 0;
8449 if (offset != 0)
8450 {
8451 stack = 2;
8452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8453 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8455 if (common->capture_last_ptr != 0)
8456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8458 if (needs_control_head)
8459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8460 if (common->capture_last_ptr != 0)
8461 {
8462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8463 stack = 3;
8464 }
8465 }
8466 else
8467 {
8468 if (needs_control_head)
8469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8471 stack = 1;
8472 }
8473
8474 if (needs_control_head)
8475 stack++;
8476 if (!zero)
8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8478 if (needs_control_head)
8479 {
8480 stack--;
8481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8482 }
8483 }
8484 else
8485 {
8486 stacksize = framesize + 1;
8487 if (!zero)
8488 stacksize++;
8489 if (needs_control_head)
8490 stacksize++;
8491 if (offset == 0)
8492 stacksize++;
8493 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8494
8495 allocate_stack(common, stacksize);
8496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8497 if (needs_control_head)
8498 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8499 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
8500
8501 stack = 0;
8502 if (!zero)
8503 {
8504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8505 stack = 1;
8506 }
8507 if (needs_control_head)
8508 {
8509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8510 stack++;
8511 }
8512 if (offset == 0)
8513 {
8514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8515 stack++;
8516 }
8517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8518 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8519 stack -= 1 + (offset == 0);
8520 }
8521
8522 if (offset != 0)
8523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8524
8525 loop = LABEL();
8526 while (*cc != OP_KETRPOS)
8527 {
8528 backtrack->top = NULL;
8529 backtrack->topbacktracks = NULL;
8530 cc += GET(cc, 1);
8531
8532 compile_matchingpath(common, ccbegin, cc, backtrack);
8533 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8534 return NULL;
8535
8536 if (framesize < 0)
8537 {
8538 if (framesize == no_frame)
8539 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8540
8541 if (offset != 0)
8542 {
8543 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8546 if (common->capture_last_ptr != 0)
8547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8549 }
8550 else
8551 {
8552 if (opcode == OP_SBRAPOS)
8553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8555 }
8556
8557 /* Even if the match is empty, we need to reset the control head. */
8558 if (needs_control_head)
8559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8560
8561 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8562 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8563
8564 if (!zero)
8565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8566 }
8567 else
8568 {
8569 if (offset != 0)
8570 {
8571 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8572 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8573 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8575 if (common->capture_last_ptr != 0)
8576 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8578 }
8579 else
8580 {
8581 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8582 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8583 if (opcode == OP_SBRAPOS)
8584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8585 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
8586 }
8587
8588 /* Even if the match is empty, we need to reset the control head. */
8589 if (needs_control_head)
8590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8591
8592 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8593 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8594
8595 if (!zero)
8596 {
8597 if (framesize < 0)
8598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8599 else
8600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8601 }
8602 }
8603
8604 JUMPTO(SLJIT_JUMP, loop);
8605 flush_stubs(common);
8606
8607 compile_backtrackingpath(common, backtrack->top);
8608 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8609 return NULL;
8610 set_jumps(backtrack->topbacktracks, LABEL());
8611
8612 if (framesize < 0)
8613 {
8614 if (offset != 0)
8615 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8616 else
8617 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8618 }
8619 else
8620 {
8621 if (offset != 0)
8622 {
8623 /* Last alternative. */
8624 if (*cc == OP_KETRPOS)
8625 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8626 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8627 }
8628 else
8629 {
8630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8631 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8632 }
8633 }
8634
8635 if (*cc == OP_KETRPOS)
8636 break;
8637 ccbegin = cc + 1 + LINK_SIZE;
8638 }
8639
8640 /* We don't have to restore the control head in case of a failed match. */
8641
8642 backtrack->topbacktracks = NULL;
8643 if (!zero)
8644 {
8645 if (framesize < 0)
8646 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8647 else /* TMP2 is set to [private_data_ptr] above. */
8648 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
8649 }
8650
8651 /* None of them matched. */
8652 set_jumps(emptymatch, LABEL());
8653 count_match(common);
8654 return cc + 1 + LINK_SIZE;
8655 }
8656
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)8657 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
8658 {
8659 int class_len;
8660
8661 *opcode = *cc;
8662 *exact = 0;
8663
8664 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8665 {
8666 cc++;
8667 *type = OP_CHAR;
8668 }
8669 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8670 {
8671 cc++;
8672 *type = OP_CHARI;
8673 *opcode -= OP_STARI - OP_STAR;
8674 }
8675 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8676 {
8677 cc++;
8678 *type = OP_NOT;
8679 *opcode -= OP_NOTSTAR - OP_STAR;
8680 }
8681 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8682 {
8683 cc++;
8684 *type = OP_NOTI;
8685 *opcode -= OP_NOTSTARI - OP_STAR;
8686 }
8687 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8688 {
8689 cc++;
8690 *opcode -= OP_TYPESTAR - OP_STAR;
8691 *type = OP_END;
8692 }
8693 else
8694 {
8695 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8696 *type = *opcode;
8697 cc++;
8698 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
8699 *opcode = cc[class_len - 1];
8700
8701 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8702 {
8703 *opcode -= OP_CRSTAR - OP_STAR;
8704 *end = cc + class_len;
8705
8706 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8707 {
8708 *exact = 1;
8709 *opcode -= OP_PLUS - OP_STAR;
8710 }
8711 }
8712 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8713 {
8714 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8715 *end = cc + class_len;
8716
8717 if (*opcode == OP_POSPLUS)
8718 {
8719 *exact = 1;
8720 *opcode = OP_POSSTAR;
8721 }
8722 }
8723 else
8724 {
8725 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8726 *max = GET2(cc, (class_len + IMM2_SIZE));
8727 *exact = GET2(cc, class_len);
8728
8729 if (*max == 0)
8730 {
8731 if (*opcode == OP_CRPOSRANGE)
8732 *opcode = OP_POSSTAR;
8733 else
8734 *opcode -= OP_CRRANGE - OP_STAR;
8735 }
8736 else
8737 {
8738 *max -= *exact;
8739 if (*max == 0)
8740 *opcode = OP_EXACT;
8741 else if (*max == 1)
8742 {
8743 if (*opcode == OP_CRPOSRANGE)
8744 *opcode = OP_POSQUERY;
8745 else
8746 *opcode -= OP_CRRANGE - OP_QUERY;
8747 }
8748 else
8749 {
8750 if (*opcode == OP_CRPOSRANGE)
8751 *opcode = OP_POSUPTO;
8752 else
8753 *opcode -= OP_CRRANGE - OP_UPTO;
8754 }
8755 }
8756 *end = cc + class_len + 2 * IMM2_SIZE;
8757 }
8758 return cc;
8759 }
8760
8761 switch(*opcode)
8762 {
8763 case OP_EXACT:
8764 *exact = GET2(cc, 0);
8765 cc += IMM2_SIZE;
8766 break;
8767
8768 case OP_PLUS:
8769 case OP_MINPLUS:
8770 *exact = 1;
8771 *opcode -= OP_PLUS - OP_STAR;
8772 break;
8773
8774 case OP_POSPLUS:
8775 *exact = 1;
8776 *opcode = OP_POSSTAR;
8777 break;
8778
8779 case OP_UPTO:
8780 case OP_MINUPTO:
8781 case OP_POSUPTO:
8782 *max = GET2(cc, 0);
8783 cc += IMM2_SIZE;
8784 break;
8785 }
8786
8787 if (*type == OP_END)
8788 {
8789 *type = *cc;
8790 *end = next_opcode(common, cc);
8791 cc++;
8792 return cc;
8793 }
8794
8795 *end = cc + 1;
8796 #ifdef SUPPORT_UNICODE
8797 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8798 #endif
8799 return cc;
8800 }
8801
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8802 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8803 {
8804 DEFINE_COMPILER;
8805 backtrack_common *backtrack;
8806 PCRE2_UCHAR opcode;
8807 PCRE2_UCHAR type;
8808 sljit_u32 max = 0, exact;
8809 BOOL fast_fail;
8810 sljit_s32 fast_str_ptr;
8811 BOOL charpos_enabled;
8812 PCRE2_UCHAR charpos_char;
8813 unsigned int charpos_othercasebit;
8814 PCRE2_SPTR end;
8815 jump_list *no_match = NULL;
8816 jump_list *no_char1_match = NULL;
8817 struct sljit_jump *jump = NULL;
8818 struct sljit_label *label;
8819 int private_data_ptr = PRIVATE_DATA(cc);
8820 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8821 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8822 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8823 int tmp_base, tmp_offset;
8824
8825 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8826
8827 fast_str_ptr = PRIVATE_DATA(cc + 1);
8828 fast_fail = TRUE;
8829
8830 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8831
8832 if (cc == common->fast_forward_bc_ptr)
8833 fast_fail = FALSE;
8834 else if (common->fast_fail_start_ptr == 0)
8835 fast_str_ptr = 0;
8836
8837 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8838 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8839
8840 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8841
8842 if (type != OP_EXTUNI)
8843 {
8844 tmp_base = TMP3;
8845 tmp_offset = 0;
8846 }
8847 else
8848 {
8849 tmp_base = SLJIT_MEM1(SLJIT_SP);
8850 tmp_offset = POSSESSIVE0;
8851 }
8852
8853 if (fast_fail && fast_str_ptr != 0)
8854 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8855
8856 /* Handle fixed part first. */
8857 if (exact > 1)
8858 {
8859 SLJIT_ASSERT(fast_str_ptr == 0);
8860 if (common->mode == PCRE2_JIT_COMPLETE
8861 #ifdef SUPPORT_UNICODE
8862 && !common->utf
8863 #endif
8864 )
8865 {
8866 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
8867 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
8868 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8869 label = LABEL();
8870 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8871 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8872 JUMPTO(SLJIT_NOT_ZERO, label);
8873 }
8874 else
8875 {
8876 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8877 label = LABEL();
8878 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8879 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8880 JUMPTO(SLJIT_NOT_ZERO, label);
8881 }
8882 }
8883 else if (exact == 1)
8884 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8885
8886 switch(opcode)
8887 {
8888 case OP_STAR:
8889 case OP_UPTO:
8890 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
8891
8892 if (type == OP_ANYNL || type == OP_EXTUNI)
8893 {
8894 SLJIT_ASSERT(private_data_ptr == 0);
8895 SLJIT_ASSERT(fast_str_ptr == 0);
8896
8897 allocate_stack(common, 2);
8898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8900
8901 if (opcode == OP_UPTO)
8902 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
8903
8904 label = LABEL();
8905 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
8906 if (opcode == OP_UPTO)
8907 {
8908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8909 OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8910 jump = JUMP(SLJIT_ZERO);
8911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8912 }
8913
8914 /* We cannot use TMP3 because of this allocate_stack. */
8915 allocate_stack(common, 1);
8916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8917 JUMPTO(SLJIT_JUMP, label);
8918 if (jump != NULL)
8919 JUMPHERE(jump);
8920 }
8921 else
8922 {
8923 charpos_enabled = FALSE;
8924 charpos_char = 0;
8925 charpos_othercasebit = 0;
8926
8927 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
8928 {
8929 charpos_enabled = TRUE;
8930 #ifdef SUPPORT_UNICODE
8931 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
8932 #endif
8933 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
8934 {
8935 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
8936 if (charpos_othercasebit == 0)
8937 charpos_enabled = FALSE;
8938 }
8939
8940 if (charpos_enabled)
8941 {
8942 charpos_char = end[1];
8943 /* Consumpe the OP_CHAR opcode. */
8944 end += 2;
8945 #if PCRE2_CODE_UNIT_WIDTH == 8
8946 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
8947 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8948 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
8949 if ((charpos_othercasebit & 0x100) != 0)
8950 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
8951 #endif
8952 if (charpos_othercasebit != 0)
8953 charpos_char |= charpos_othercasebit;
8954
8955 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
8956 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
8957 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
8958 }
8959 }
8960
8961 if (charpos_enabled)
8962 {
8963 if (opcode == OP_UPTO)
8964 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
8965
8966 /* Search the first instance of charpos_char. */
8967 jump = JUMP(SLJIT_JUMP);
8968 label = LABEL();
8969 if (opcode == OP_UPTO)
8970 {
8971 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8972 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
8973 }
8974 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8975 if (fast_str_ptr != 0)
8976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8977 JUMPHERE(jump);
8978
8979 detect_partial_match(common, &backtrack->topbacktracks);
8980 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8981 if (charpos_othercasebit != 0)
8982 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8983 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8984
8985 if (private_data_ptr == 0)
8986 allocate_stack(common, 2);
8987 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8988 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8989 if (opcode == OP_UPTO)
8990 {
8991 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8992 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
8993 }
8994
8995 /* Search the last instance of charpos_char. */
8996 label = LABEL();
8997 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
8998 if (fast_str_ptr != 0)
8999 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9000 detect_partial_match(common, &no_match);
9001 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9002 if (charpos_othercasebit != 0)
9003 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9004 if (opcode == OP_STAR)
9005 {
9006 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9007 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9008 }
9009 else
9010 {
9011 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9012 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9013 JUMPHERE(jump);
9014 }
9015
9016 if (opcode == OP_UPTO)
9017 {
9018 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9019 JUMPTO(SLJIT_NOT_ZERO, label);
9020 }
9021 else
9022 JUMPTO(SLJIT_JUMP, label);
9023
9024 set_jumps(no_match, LABEL());
9025 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9027 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9028 }
9029 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9030 else if (common->utf)
9031 {
9032 if (private_data_ptr == 0)
9033 allocate_stack(common, 2);
9034
9035 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9036 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9037
9038 if (opcode == OP_UPTO)
9039 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9040
9041 label = LABEL();
9042 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9043 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9044
9045 if (opcode == OP_UPTO)
9046 {
9047 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9048 JUMPTO(SLJIT_NOT_ZERO, label);
9049 }
9050 else
9051 JUMPTO(SLJIT_JUMP, label);
9052
9053 set_jumps(no_match, LABEL());
9054 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9055 if (fast_str_ptr != 0)
9056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9057 }
9058 #endif
9059 else
9060 {
9061 if (private_data_ptr == 0)
9062 allocate_stack(common, 2);
9063
9064 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9065 if (opcode == OP_UPTO)
9066 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9067
9068 label = LABEL();
9069 detect_partial_match(common, &no_match);
9070 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9071 if (opcode == OP_UPTO)
9072 {
9073 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9074 JUMPTO(SLJIT_NOT_ZERO, label);
9075 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9076 }
9077 else
9078 JUMPTO(SLJIT_JUMP, label);
9079
9080 set_jumps(no_char1_match, LABEL());
9081 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9082 set_jumps(no_match, LABEL());
9083 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9084 if (fast_str_ptr != 0)
9085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9086 }
9087 }
9088 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9089 break;
9090
9091 case OP_MINSTAR:
9092 if (private_data_ptr == 0)
9093 allocate_stack(common, 1);
9094 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9095 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9096 if (fast_str_ptr != 0)
9097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9098 break;
9099
9100 case OP_MINUPTO:
9101 SLJIT_ASSERT(fast_str_ptr == 0);
9102 if (private_data_ptr == 0)
9103 allocate_stack(common, 2);
9104 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9105 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9106 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9107 break;
9108
9109 case OP_QUERY:
9110 case OP_MINQUERY:
9111 SLJIT_ASSERT(fast_str_ptr == 0);
9112 if (private_data_ptr == 0)
9113 allocate_stack(common, 1);
9114 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9115 if (opcode == OP_QUERY)
9116 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9117 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9118 break;
9119
9120 case OP_EXACT:
9121 break;
9122
9123 case OP_POSSTAR:
9124 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9125 if (common->utf)
9126 {
9127 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9128 label = LABEL();
9129 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9130 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9131 JUMPTO(SLJIT_JUMP, label);
9132 set_jumps(no_match, LABEL());
9133 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9134 if (fast_str_ptr != 0)
9135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9136 break;
9137 }
9138 #endif
9139 label = LABEL();
9140 detect_partial_match(common, &no_match);
9141 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9142 JUMPTO(SLJIT_JUMP, label);
9143 set_jumps(no_char1_match, LABEL());
9144 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9145 set_jumps(no_match, LABEL());
9146 if (fast_str_ptr != 0)
9147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9148 break;
9149
9150 case OP_POSUPTO:
9151 SLJIT_ASSERT(fast_str_ptr == 0);
9152 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
9153 if (common->utf)
9154 {
9155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9156 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9157 label = LABEL();
9158 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9160 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9161 JUMPTO(SLJIT_NOT_ZERO, label);
9162 set_jumps(no_match, LABEL());
9163 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9164 break;
9165 }
9166 #endif
9167 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9168 label = LABEL();
9169 detect_partial_match(common, &no_match);
9170 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9171 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9172 JUMPTO(SLJIT_NOT_ZERO, label);
9173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9174 set_jumps(no_char1_match, LABEL());
9175 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9176 set_jumps(no_match, LABEL());
9177 break;
9178
9179 case OP_POSQUERY:
9180 SLJIT_ASSERT(fast_str_ptr == 0);
9181 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9182 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9183 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9184 set_jumps(no_match, LABEL());
9185 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9186 break;
9187
9188 default:
9189 SLJIT_ASSERT_STOP();
9190 break;
9191 }
9192
9193 count_match(common);
9194 return end;
9195 }
9196
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9197 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9198 {
9199 DEFINE_COMPILER;
9200 backtrack_common *backtrack;
9201
9202 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9203
9204 if (*cc == OP_FAIL)
9205 {
9206 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9207 return cc + 1;
9208 }
9209
9210 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9211 {
9212 /* No need to check notempty conditions. */
9213 if (common->accept_label == NULL)
9214 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9215 else
9216 JUMPTO(SLJIT_JUMP, common->accept_label);
9217 return cc + 1;
9218 }
9219
9220 if (common->accept_label == NULL)
9221 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9222 else
9223 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9224 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9225 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
9226 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
9227 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
9228 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
9229 if (common->accept_label == NULL)
9230 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
9231 else
9232 JUMPTO(SLJIT_ZERO, common->accept_label);
9233 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9234 if (common->accept_label == NULL)
9235 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9236 else
9237 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9238 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9239 return cc + 1;
9240 }
9241
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)9242 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
9243 {
9244 DEFINE_COMPILER;
9245 int offset = GET2(cc, 1);
9246 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9247
9248 /* Data will be discarded anyway... */
9249 if (common->currententry != NULL)
9250 return cc + 1 + IMM2_SIZE;
9251
9252 if (!optimized_cbracket)
9253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9254 offset <<= 1;
9255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9256 if (!optimized_cbracket)
9257 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9258 return cc + 1 + IMM2_SIZE;
9259 }
9260
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9261 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9262 {
9263 DEFINE_COMPILER;
9264 backtrack_common *backtrack;
9265 PCRE2_UCHAR opcode = *cc;
9266 PCRE2_SPTR ccend = cc + 1;
9267
9268 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9269 ccend += 2 + cc[1];
9270
9271 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9272
9273 if (opcode == OP_SKIP)
9274 {
9275 allocate_stack(common, 1);
9276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9277 return ccend;
9278 }
9279
9280 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9281 {
9282 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9283 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9285 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9286 }
9287
9288 return ccend;
9289 }
9290
9291 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
9292
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)9293 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
9294 {
9295 DEFINE_COMPILER;
9296 backtrack_common *backtrack;
9297 BOOL needs_control_head;
9298 int size;
9299
9300 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9301 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9302 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9303 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9304 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9305
9306 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9307 size = 3 + (size < 0 ? 0 : size);
9308
9309 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9310 allocate_stack(common, size);
9311 if (size > 3)
9312 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9313 else
9314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9318
9319 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9320 if (size >= 0)
9321 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9322 }
9323
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)9324 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
9325 {
9326 DEFINE_COMPILER;
9327 backtrack_common *backtrack;
9328 BOOL has_then_trap = FALSE;
9329 then_trap_backtrack *save_then_trap = NULL;
9330
9331 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9332
9333 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9334 {
9335 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9336 has_then_trap = TRUE;
9337 save_then_trap = common->then_trap;
9338 /* Tail item on backtrack. */
9339 compile_then_trap_matchingpath(common, cc, ccend, parent);
9340 }
9341
9342 while (cc < ccend)
9343 {
9344 switch(*cc)
9345 {
9346 case OP_SOD:
9347 case OP_SOM:
9348 case OP_NOT_WORD_BOUNDARY:
9349 case OP_WORD_BOUNDARY:
9350 case OP_EODN:
9351 case OP_EOD:
9352 case OP_DOLL:
9353 case OP_DOLLM:
9354 case OP_CIRC:
9355 case OP_CIRCM:
9356 case OP_REVERSE:
9357 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9358 break;
9359
9360 case OP_NOT_DIGIT:
9361 case OP_DIGIT:
9362 case OP_NOT_WHITESPACE:
9363 case OP_WHITESPACE:
9364 case OP_NOT_WORDCHAR:
9365 case OP_WORDCHAR:
9366 case OP_ANY:
9367 case OP_ALLANY:
9368 case OP_ANYBYTE:
9369 case OP_NOTPROP:
9370 case OP_PROP:
9371 case OP_ANYNL:
9372 case OP_NOT_HSPACE:
9373 case OP_HSPACE:
9374 case OP_NOT_VSPACE:
9375 case OP_VSPACE:
9376 case OP_EXTUNI:
9377 case OP_NOT:
9378 case OP_NOTI:
9379 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9380 break;
9381
9382 case OP_SET_SOM:
9383 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9385 allocate_stack(common, 1);
9386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9388 cc++;
9389 break;
9390
9391 case OP_CHAR:
9392 case OP_CHARI:
9393 if (common->mode == PCRE2_JIT_COMPLETE)
9394 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9395 else
9396 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9397 break;
9398
9399 case OP_STAR:
9400 case OP_MINSTAR:
9401 case OP_PLUS:
9402 case OP_MINPLUS:
9403 case OP_QUERY:
9404 case OP_MINQUERY:
9405 case OP_UPTO:
9406 case OP_MINUPTO:
9407 case OP_EXACT:
9408 case OP_POSSTAR:
9409 case OP_POSPLUS:
9410 case OP_POSQUERY:
9411 case OP_POSUPTO:
9412 case OP_STARI:
9413 case OP_MINSTARI:
9414 case OP_PLUSI:
9415 case OP_MINPLUSI:
9416 case OP_QUERYI:
9417 case OP_MINQUERYI:
9418 case OP_UPTOI:
9419 case OP_MINUPTOI:
9420 case OP_EXACTI:
9421 case OP_POSSTARI:
9422 case OP_POSPLUSI:
9423 case OP_POSQUERYI:
9424 case OP_POSUPTOI:
9425 case OP_NOTSTAR:
9426 case OP_NOTMINSTAR:
9427 case OP_NOTPLUS:
9428 case OP_NOTMINPLUS:
9429 case OP_NOTQUERY:
9430 case OP_NOTMINQUERY:
9431 case OP_NOTUPTO:
9432 case OP_NOTMINUPTO:
9433 case OP_NOTEXACT:
9434 case OP_NOTPOSSTAR:
9435 case OP_NOTPOSPLUS:
9436 case OP_NOTPOSQUERY:
9437 case OP_NOTPOSUPTO:
9438 case OP_NOTSTARI:
9439 case OP_NOTMINSTARI:
9440 case OP_NOTPLUSI:
9441 case OP_NOTMINPLUSI:
9442 case OP_NOTQUERYI:
9443 case OP_NOTMINQUERYI:
9444 case OP_NOTUPTOI:
9445 case OP_NOTMINUPTOI:
9446 case OP_NOTEXACTI:
9447 case OP_NOTPOSSTARI:
9448 case OP_NOTPOSPLUSI:
9449 case OP_NOTPOSQUERYI:
9450 case OP_NOTPOSUPTOI:
9451 case OP_TYPESTAR:
9452 case OP_TYPEMINSTAR:
9453 case OP_TYPEPLUS:
9454 case OP_TYPEMINPLUS:
9455 case OP_TYPEQUERY:
9456 case OP_TYPEMINQUERY:
9457 case OP_TYPEUPTO:
9458 case OP_TYPEMINUPTO:
9459 case OP_TYPEEXACT:
9460 case OP_TYPEPOSSTAR:
9461 case OP_TYPEPOSPLUS:
9462 case OP_TYPEPOSQUERY:
9463 case OP_TYPEPOSUPTO:
9464 cc = compile_iterator_matchingpath(common, cc, parent);
9465 break;
9466
9467 case OP_CLASS:
9468 case OP_NCLASS:
9469 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
9470 cc = compile_iterator_matchingpath(common, cc, parent);
9471 else
9472 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9473 break;
9474
9475 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9476 case OP_XCLASS:
9477 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9478 cc = compile_iterator_matchingpath(common, cc, parent);
9479 else
9480 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9481 break;
9482 #endif
9483
9484 case OP_REF:
9485 case OP_REFI:
9486 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9487 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9488 else
9489 {
9490 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9491 cc += 1 + IMM2_SIZE;
9492 }
9493 break;
9494
9495 case OP_DNREF:
9496 case OP_DNREFI:
9497 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9498 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9499 else
9500 {
9501 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9502 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9503 cc += 1 + 2 * IMM2_SIZE;
9504 }
9505 break;
9506
9507 case OP_RECURSE:
9508 cc = compile_recurse_matchingpath(common, cc, parent);
9509 break;
9510
9511 case OP_CALLOUT:
9512 case OP_CALLOUT_STR:
9513 cc = compile_callout_matchingpath(common, cc, parent);
9514 break;
9515
9516 case OP_ASSERT:
9517 case OP_ASSERT_NOT:
9518 case OP_ASSERTBACK:
9519 case OP_ASSERTBACK_NOT:
9520 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9521 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9522 break;
9523
9524 case OP_BRAMINZERO:
9525 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9526 cc = bracketend(cc + 1);
9527 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9528 {
9529 allocate_stack(common, 1);
9530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9531 }
9532 else
9533 {
9534 allocate_stack(common, 2);
9535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9537 }
9538 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9539 count_match(common);
9540 break;
9541
9542 case OP_ONCE:
9543 case OP_ONCE_NC:
9544 case OP_BRA:
9545 case OP_CBRA:
9546 case OP_COND:
9547 case OP_SBRA:
9548 case OP_SCBRA:
9549 case OP_SCOND:
9550 cc = compile_bracket_matchingpath(common, cc, parent);
9551 break;
9552
9553 case OP_BRAZERO:
9554 if (cc[1] > OP_ASSERTBACK_NOT)
9555 cc = compile_bracket_matchingpath(common, cc, parent);
9556 else
9557 {
9558 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9559 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9560 }
9561 break;
9562
9563 case OP_BRAPOS:
9564 case OP_CBRAPOS:
9565 case OP_SBRAPOS:
9566 case OP_SCBRAPOS:
9567 case OP_BRAPOSZERO:
9568 cc = compile_bracketpos_matchingpath(common, cc, parent);
9569 break;
9570
9571 case OP_MARK:
9572 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9573 SLJIT_ASSERT(common->mark_ptr != 0);
9574 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9575 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9576 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9577 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9578 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9579 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9580 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9581 if (common->has_skip_arg)
9582 {
9583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9584 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9586 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9589 }
9590 cc += 1 + 2 + cc[1];
9591 break;
9592
9593 case OP_PRUNE:
9594 case OP_PRUNE_ARG:
9595 case OP_SKIP:
9596 case OP_SKIP_ARG:
9597 case OP_THEN:
9598 case OP_THEN_ARG:
9599 case OP_COMMIT:
9600 cc = compile_control_verb_matchingpath(common, cc, parent);
9601 break;
9602
9603 case OP_FAIL:
9604 case OP_ACCEPT:
9605 case OP_ASSERT_ACCEPT:
9606 cc = compile_fail_accept_matchingpath(common, cc, parent);
9607 break;
9608
9609 case OP_CLOSE:
9610 cc = compile_close_matchingpath(common, cc);
9611 break;
9612
9613 case OP_SKIPZERO:
9614 cc = bracketend(cc + 1);
9615 break;
9616
9617 default:
9618 SLJIT_ASSERT_STOP();
9619 return;
9620 }
9621 if (cc == NULL)
9622 return;
9623 }
9624
9625 if (has_then_trap)
9626 {
9627 /* Head item on backtrack. */
9628 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9629 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9630 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9631 common->then_trap = save_then_trap;
9632 }
9633 SLJIT_ASSERT(cc == ccend);
9634 }
9635
9636 #undef PUSH_BACKTRACK
9637 #undef PUSH_BACKTRACK_NOVALUE
9638 #undef BACKTRACK_AS
9639
9640 #define COMPILE_BACKTRACKINGPATH(current) \
9641 do \
9642 { \
9643 compile_backtrackingpath(common, (current)); \
9644 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9645 return; \
9646 } \
9647 while (0)
9648
9649 #define CURRENT_AS(type) ((type *)current)
9650
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9651 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9652 {
9653 DEFINE_COMPILER;
9654 PCRE2_SPTR cc = current->cc;
9655 PCRE2_UCHAR opcode;
9656 PCRE2_UCHAR type;
9657 sljit_u32 max = 0, exact;
9658 struct sljit_label *label = NULL;
9659 struct sljit_jump *jump = NULL;
9660 jump_list *jumplist = NULL;
9661 PCRE2_SPTR end;
9662 int private_data_ptr = PRIVATE_DATA(cc);
9663 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9664 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9665 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9666
9667 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9668
9669 switch(opcode)
9670 {
9671 case OP_STAR:
9672 case OP_UPTO:
9673 if (type == OP_ANYNL || type == OP_EXTUNI)
9674 {
9675 SLJIT_ASSERT(private_data_ptr == 0);
9676 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9677 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9678 free_stack(common, 1);
9679 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9680 }
9681 else
9682 {
9683 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9684 {
9685 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9686 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9687 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9688
9689 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9690 label = LABEL();
9691 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9692 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9693 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9694 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9695 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9696 skip_char_back(common);
9697 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9698 }
9699 else
9700 {
9701 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9702 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9703 skip_char_back(common);
9704 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9705 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9706 }
9707 JUMPHERE(jump);
9708 if (private_data_ptr == 0)
9709 free_stack(common, 2);
9710 }
9711 break;
9712
9713 case OP_MINSTAR:
9714 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9715 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9716 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9717 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9718 set_jumps(jumplist, LABEL());
9719 if (private_data_ptr == 0)
9720 free_stack(common, 1);
9721 break;
9722
9723 case OP_MINUPTO:
9724 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9725 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9726 OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9727 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9728
9729 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9730 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9731 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9732 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9733
9734 set_jumps(jumplist, LABEL());
9735 if (private_data_ptr == 0)
9736 free_stack(common, 2);
9737 break;
9738
9739 case OP_QUERY:
9740 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9741 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9742 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9743 jump = JUMP(SLJIT_JUMP);
9744 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9745 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9746 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9747 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9748 JUMPHERE(jump);
9749 if (private_data_ptr == 0)
9750 free_stack(common, 1);
9751 break;
9752
9753 case OP_MINQUERY:
9754 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9755 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9756 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9757 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9758 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9759 set_jumps(jumplist, LABEL());
9760 JUMPHERE(jump);
9761 if (private_data_ptr == 0)
9762 free_stack(common, 1);
9763 break;
9764
9765 case OP_EXACT:
9766 case OP_POSSTAR:
9767 case OP_POSQUERY:
9768 case OP_POSUPTO:
9769 break;
9770
9771 default:
9772 SLJIT_ASSERT_STOP();
9773 break;
9774 }
9775
9776 set_jumps(current->topbacktracks, LABEL());
9777 }
9778
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9779 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9780 {
9781 DEFINE_COMPILER;
9782 PCRE2_SPTR cc = current->cc;
9783 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9784 PCRE2_UCHAR type;
9785
9786 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9787
9788 if ((type & 0x1) == 0)
9789 {
9790 /* Maximize case. */
9791 set_jumps(current->topbacktracks, LABEL());
9792 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9793 free_stack(common, 1);
9794 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9795 return;
9796 }
9797
9798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9799 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9800 set_jumps(current->topbacktracks, LABEL());
9801 free_stack(common, ref ? 2 : 3);
9802 }
9803
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9804 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9805 {
9806 DEFINE_COMPILER;
9807
9808 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9809 compile_backtrackingpath(common, current->top);
9810 set_jumps(current->topbacktracks, LABEL());
9811 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9812 return;
9813
9814 if (common->has_set_som && common->mark_ptr != 0)
9815 {
9816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9818 free_stack(common, 2);
9819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9821 }
9822 else if (common->has_set_som || common->mark_ptr != 0)
9823 {
9824 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9825 free_stack(common, 1);
9826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9827 }
9828 }
9829
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9830 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9831 {
9832 DEFINE_COMPILER;
9833 PCRE2_SPTR cc = current->cc;
9834 PCRE2_UCHAR bra = OP_BRA;
9835 struct sljit_jump *brajump = NULL;
9836
9837 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9838 if (*cc == OP_BRAZERO)
9839 {
9840 bra = *cc;
9841 cc++;
9842 }
9843
9844 if (bra == OP_BRAZERO)
9845 {
9846 SLJIT_ASSERT(current->topbacktracks == NULL);
9847 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9848 }
9849
9850 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9851 {
9852 set_jumps(current->topbacktracks, LABEL());
9853
9854 if (bra == OP_BRAZERO)
9855 {
9856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9857 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9858 free_stack(common, 1);
9859 }
9860 return;
9861 }
9862
9863 if (bra == OP_BRAZERO)
9864 {
9865 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
9866 {
9867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9868 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9869 free_stack(common, 1);
9870 return;
9871 }
9872 free_stack(common, 1);
9873 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9874 }
9875
9876 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
9877 {
9878 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
9879 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
9881
9882 set_jumps(current->topbacktracks, LABEL());
9883 }
9884 else
9885 set_jumps(current->topbacktracks, LABEL());
9886
9887 if (bra == OP_BRAZERO)
9888 {
9889 /* We know there is enough place on the stack. */
9890 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9892 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
9893 JUMPHERE(brajump);
9894 }
9895 }
9896
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)9897 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9898 {
9899 DEFINE_COMPILER;
9900 int opcode, stacksize, alt_count, alt_max;
9901 int offset = 0;
9902 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
9903 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
9904 PCRE2_SPTR cc = current->cc;
9905 PCRE2_SPTR ccbegin;
9906 PCRE2_SPTR ccprev;
9907 PCRE2_UCHAR bra = OP_BRA;
9908 PCRE2_UCHAR ket;
9909 assert_backtrack *assert;
9910 sljit_uw *next_update_addr = NULL;
9911 BOOL has_alternatives;
9912 BOOL needs_control_head = FALSE;
9913 struct sljit_jump *brazero = NULL;
9914 struct sljit_jump *alt1 = NULL;
9915 struct sljit_jump *alt2 = NULL;
9916 struct sljit_jump *once = NULL;
9917 struct sljit_jump *cond = NULL;
9918 struct sljit_label *rmin_label = NULL;
9919 struct sljit_label *exact_label = NULL;
9920
9921 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9922 {
9923 bra = *cc;
9924 cc++;
9925 }
9926
9927 opcode = *cc;
9928 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
9929 ket = *ccbegin;
9930 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
9931 {
9932 repeat_ptr = PRIVATE_DATA(ccbegin);
9933 repeat_type = PRIVATE_DATA(ccbegin + 2);
9934 repeat_count = PRIVATE_DATA(ccbegin + 3);
9935 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
9936 if (repeat_type == OP_UPTO)
9937 ket = OP_KETRMAX;
9938 if (repeat_type == OP_MINUPTO)
9939 ket = OP_KETRMIN;
9940 }
9941 ccbegin = cc;
9942 cc += GET(cc, 1);
9943 has_alternatives = *cc == OP_ALT;
9944 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9945 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
9946 if (opcode == OP_CBRA || opcode == OP_SCBRA)
9947 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
9948 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9949 opcode = OP_SCOND;
9950 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
9951 opcode = OP_ONCE;
9952
9953 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
9954
9955 /* Decoding the needs_control_head in framesize. */
9956 if (opcode == OP_ONCE)
9957 {
9958 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
9959 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
9960 }
9961
9962 if (ket != OP_KET && repeat_type != 0)
9963 {
9964 /* TMP1 is used in OP_KETRMIN below. */
9965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9966 free_stack(common, 1);
9967 if (repeat_type == OP_UPTO)
9968 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
9969 else
9970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9971 }
9972
9973 if (ket == OP_KETRMAX)
9974 {
9975 if (bra == OP_BRAZERO)
9976 {
9977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9978 free_stack(common, 1);
9979 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9980 }
9981 }
9982 else if (ket == OP_KETRMIN)
9983 {
9984 if (bra != OP_BRAMINZERO)
9985 {
9986 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9987 if (repeat_type != 0)
9988 {
9989 /* TMP1 was set a few lines above. */
9990 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9991 /* Drop STR_PTR for non-greedy plus quantifier. */
9992 if (opcode != OP_ONCE)
9993 free_stack(common, 1);
9994 }
9995 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
9996 {
9997 /* Checking zero-length iteration. */
9998 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
9999 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10000 else
10001 {
10002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10003 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10004 }
10005 /* Drop STR_PTR for non-greedy plus quantifier. */
10006 if (opcode != OP_ONCE)
10007 free_stack(common, 1);
10008 }
10009 else
10010 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10011 }
10012 rmin_label = LABEL();
10013 if (repeat_type != 0)
10014 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10015 }
10016 else if (bra == OP_BRAZERO)
10017 {
10018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10019 free_stack(common, 1);
10020 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10021 }
10022 else if (repeat_type == OP_EXACT)
10023 {
10024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10025 exact_label = LABEL();
10026 }
10027
10028 if (offset != 0)
10029 {
10030 if (common->capture_last_ptr != 0)
10031 {
10032 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10034 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10037 free_stack(common, 3);
10038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10040 }
10041 else if (common->optimized_cbracket[offset >> 1] == 0)
10042 {
10043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10044 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10045 free_stack(common, 2);
10046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10048 }
10049 }
10050
10051 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10052 {
10053 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10054 {
10055 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10056 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10057 }
10058 once = JUMP(SLJIT_JUMP);
10059 }
10060 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10061 {
10062 if (has_alternatives)
10063 {
10064 /* Always exactly one alternative. */
10065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066 free_stack(common, 1);
10067
10068 alt_max = 2;
10069 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10070 }
10071 }
10072 else if (has_alternatives)
10073 {
10074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10075 free_stack(common, 1);
10076
10077 if (alt_max > 4)
10078 {
10079 /* Table jump if alt_max is greater than 4. */
10080 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10081 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10082 return;
10083 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10084 add_label_addr(common, next_update_addr++);
10085 }
10086 else
10087 {
10088 if (alt_max == 4)
10089 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10090 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10091 }
10092 }
10093
10094 COMPILE_BACKTRACKINGPATH(current->top);
10095 if (current->topbacktracks)
10096 set_jumps(current->topbacktracks, LABEL());
10097
10098 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10099 {
10100 /* Conditional block always has at most one alternative. */
10101 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10102 {
10103 SLJIT_ASSERT(has_alternatives);
10104 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10105 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10106 {
10107 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10108 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10110 }
10111 cond = JUMP(SLJIT_JUMP);
10112 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10113 }
10114 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10115 {
10116 SLJIT_ASSERT(has_alternatives);
10117 cond = JUMP(SLJIT_JUMP);
10118 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10119 }
10120 else
10121 SLJIT_ASSERT(!has_alternatives);
10122 }
10123
10124 if (has_alternatives)
10125 {
10126 alt_count = sizeof(sljit_uw);
10127 do
10128 {
10129 current->top = NULL;
10130 current->topbacktracks = NULL;
10131 current->nextbacktracks = NULL;
10132 /* Conditional blocks always have an additional alternative, even if it is empty. */
10133 if (*cc == OP_ALT)
10134 {
10135 ccprev = cc + 1 + LINK_SIZE;
10136 cc += GET(cc, 1);
10137 if (opcode != OP_COND && opcode != OP_SCOND)
10138 {
10139 if (opcode != OP_ONCE)
10140 {
10141 if (private_data_ptr != 0)
10142 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10143 else
10144 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10145 }
10146 else
10147 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10148 }
10149 compile_matchingpath(common, ccprev, cc, current);
10150 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10151 return;
10152 }
10153
10154 /* Instructions after the current alternative is successfully matched. */
10155 /* There is a similar code in compile_bracket_matchingpath. */
10156 if (opcode == OP_ONCE)
10157 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10158
10159 stacksize = 0;
10160 if (repeat_type == OP_MINUPTO)
10161 {
10162 /* We need to preserve the counter. TMP2 will be used below. */
10163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10164 stacksize++;
10165 }
10166 if (ket != OP_KET || bra != OP_BRA)
10167 stacksize++;
10168 if (offset != 0)
10169 {
10170 if (common->capture_last_ptr != 0)
10171 stacksize++;
10172 if (common->optimized_cbracket[offset >> 1] == 0)
10173 stacksize += 2;
10174 }
10175 if (opcode != OP_ONCE)
10176 stacksize++;
10177
10178 if (stacksize > 0)
10179 allocate_stack(common, stacksize);
10180
10181 stacksize = 0;
10182 if (repeat_type == OP_MINUPTO)
10183 {
10184 /* TMP2 was set above. */
10185 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10186 stacksize++;
10187 }
10188
10189 if (ket != OP_KET || bra != OP_BRA)
10190 {
10191 if (ket != OP_KET)
10192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10193 else
10194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10195 stacksize++;
10196 }
10197
10198 if (offset != 0)
10199 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10200
10201 if (opcode != OP_ONCE)
10202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10203
10204 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10205 {
10206 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10207 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10209 }
10210
10211 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10212
10213 if (opcode != OP_ONCE)
10214 {
10215 if (alt_max > 4)
10216 add_label_addr(common, next_update_addr++);
10217 else
10218 {
10219 if (alt_count != 2 * sizeof(sljit_uw))
10220 {
10221 JUMPHERE(alt1);
10222 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10223 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10224 }
10225 else
10226 {
10227 JUMPHERE(alt2);
10228 if (alt_max == 4)
10229 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10230 }
10231 }
10232 alt_count += sizeof(sljit_uw);
10233 }
10234
10235 COMPILE_BACKTRACKINGPATH(current->top);
10236 if (current->topbacktracks)
10237 set_jumps(current->topbacktracks, LABEL());
10238 SLJIT_ASSERT(!current->nextbacktracks);
10239 }
10240 while (*cc == OP_ALT);
10241
10242 if (cond != NULL)
10243 {
10244 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10245 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10246 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10247 {
10248 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10249 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10251 }
10252 JUMPHERE(cond);
10253 }
10254
10255 /* Free the STR_PTR. */
10256 if (private_data_ptr == 0)
10257 free_stack(common, 1);
10258 }
10259
10260 if (offset != 0)
10261 {
10262 /* Using both tmp register is better for instruction scheduling. */
10263 if (common->optimized_cbracket[offset >> 1] != 0)
10264 {
10265 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10267 free_stack(common, 2);
10268 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10270 }
10271 else
10272 {
10273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10274 free_stack(common, 1);
10275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10276 }
10277 }
10278 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10279 {
10280 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10281 free_stack(common, 1);
10282 }
10283 else if (opcode == OP_ONCE)
10284 {
10285 cc = ccbegin + GET(ccbegin, 1);
10286 stacksize = needs_control_head ? 1 : 0;
10287
10288 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10289 {
10290 /* Reset head and drop saved frame. */
10291 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10292 }
10293 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10294 {
10295 /* The STR_PTR must be released. */
10296 stacksize++;
10297 }
10298
10299 if (stacksize > 0)
10300 free_stack(common, stacksize);
10301
10302 JUMPHERE(once);
10303 /* Restore previous private_data_ptr */
10304 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
10306 else if (ket == OP_KETRMIN)
10307 {
10308 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10309 /* See the comment below. */
10310 free_stack(common, 2);
10311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10312 }
10313 }
10314
10315 if (repeat_type == OP_EXACT)
10316 {
10317 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10318 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10319 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10320 }
10321 else if (ket == OP_KETRMAX)
10322 {
10323 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10324 if (bra != OP_BRAZERO)
10325 free_stack(common, 1);
10326
10327 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10328 if (bra == OP_BRAZERO)
10329 {
10330 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10331 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10332 JUMPHERE(brazero);
10333 free_stack(common, 1);
10334 }
10335 }
10336 else if (ket == OP_KETRMIN)
10337 {
10338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10339
10340 /* OP_ONCE removes everything in case of a backtrack, so we don't
10341 need to explicitly release the STR_PTR. The extra release would
10342 affect badly the free_stack(2) above. */
10343 if (opcode != OP_ONCE)
10344 free_stack(common, 1);
10345 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10346 if (opcode == OP_ONCE)
10347 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10348 else if (bra == OP_BRAMINZERO)
10349 free_stack(common, 1);
10350 }
10351 else if (bra == OP_BRAZERO)
10352 {
10353 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10354 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10355 JUMPHERE(brazero);
10356 }
10357 }
10358
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10359 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10360 {
10361 DEFINE_COMPILER;
10362 int offset;
10363 struct sljit_jump *jump;
10364
10365 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10366 {
10367 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10368 {
10369 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10370 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10372 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10373 if (common->capture_last_ptr != 0)
10374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10376 if (common->capture_last_ptr != 0)
10377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10378 }
10379 set_jumps(current->topbacktracks, LABEL());
10380 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10381 return;
10382 }
10383
10384 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10385 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10386
10387 if (current->topbacktracks)
10388 {
10389 jump = JUMP(SLJIT_JUMP);
10390 set_jumps(current->topbacktracks, LABEL());
10391 /* Drop the stack frame. */
10392 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10393 JUMPHERE(jump);
10394 }
10395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
10396 }
10397
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10398 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10399 {
10400 assert_backtrack backtrack;
10401
10402 current->top = NULL;
10403 current->topbacktracks = NULL;
10404 current->nextbacktracks = NULL;
10405 if (current->cc[1] > OP_ASSERTBACK_NOT)
10406 {
10407 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10408 compile_bracket_matchingpath(common, current->cc, current);
10409 compile_bracket_backtrackingpath(common, current->top);
10410 }
10411 else
10412 {
10413 memset(&backtrack, 0, sizeof(backtrack));
10414 backtrack.common.cc = current->cc;
10415 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10416 /* Manual call of compile_assert_matchingpath. */
10417 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10418 }
10419 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10420 }
10421
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10422 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10423 {
10424 DEFINE_COMPILER;
10425 PCRE2_UCHAR opcode = *current->cc;
10426 struct sljit_label *loop;
10427 struct sljit_jump *jump;
10428
10429 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10430 {
10431 if (common->then_trap != NULL)
10432 {
10433 SLJIT_ASSERT(common->control_head_ptr != 0);
10434
10435 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10437 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10438 jump = JUMP(SLJIT_JUMP);
10439
10440 loop = LABEL();
10441 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
10442 JUMPHERE(jump);
10443 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
10444 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
10445 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10446 return;
10447 }
10448 else if (common->positive_assert)
10449 {
10450 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10451 return;
10452 }
10453 }
10454
10455 if (common->local_exit)
10456 {
10457 if (common->quit_label == NULL)
10458 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10459 else
10460 JUMPTO(SLJIT_JUMP, common->quit_label);
10461 return;
10462 }
10463
10464 if (opcode == OP_SKIP_ARG)
10465 {
10466 SLJIT_ASSERT(common->control_head_ptr != 0);
10467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10469 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10470 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10471 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10472
10473 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10474 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
10475 return;
10476 }
10477
10478 if (opcode == OP_SKIP)
10479 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10480 else
10481 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10482 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10483 }
10484
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10485 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10486 {
10487 DEFINE_COMPILER;
10488 struct sljit_jump *jump;
10489 int size;
10490
10491 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10492 {
10493 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10494 return;
10495 }
10496
10497 size = CURRENT_AS(then_trap_backtrack)->framesize;
10498 size = 3 + (size < 0 ? 0 : size);
10499
10500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10501 free_stack(common, size);
10502 jump = JUMP(SLJIT_JUMP);
10503
10504 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10505 /* STACK_TOP is set by THEN. */
10506 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10507 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10509 free_stack(common, 3);
10510
10511 JUMPHERE(jump);
10512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10513 }
10514
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10515 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10516 {
10517 DEFINE_COMPILER;
10518 then_trap_backtrack *save_then_trap = common->then_trap;
10519
10520 while (current)
10521 {
10522 if (current->nextbacktracks != NULL)
10523 set_jumps(current->nextbacktracks, LABEL());
10524 switch(*current->cc)
10525 {
10526 case OP_SET_SOM:
10527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10528 free_stack(common, 1);
10529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10530 break;
10531
10532 case OP_STAR:
10533 case OP_MINSTAR:
10534 case OP_PLUS:
10535 case OP_MINPLUS:
10536 case OP_QUERY:
10537 case OP_MINQUERY:
10538 case OP_UPTO:
10539 case OP_MINUPTO:
10540 case OP_EXACT:
10541 case OP_POSSTAR:
10542 case OP_POSPLUS:
10543 case OP_POSQUERY:
10544 case OP_POSUPTO:
10545 case OP_STARI:
10546 case OP_MINSTARI:
10547 case OP_PLUSI:
10548 case OP_MINPLUSI:
10549 case OP_QUERYI:
10550 case OP_MINQUERYI:
10551 case OP_UPTOI:
10552 case OP_MINUPTOI:
10553 case OP_EXACTI:
10554 case OP_POSSTARI:
10555 case OP_POSPLUSI:
10556 case OP_POSQUERYI:
10557 case OP_POSUPTOI:
10558 case OP_NOTSTAR:
10559 case OP_NOTMINSTAR:
10560 case OP_NOTPLUS:
10561 case OP_NOTMINPLUS:
10562 case OP_NOTQUERY:
10563 case OP_NOTMINQUERY:
10564 case OP_NOTUPTO:
10565 case OP_NOTMINUPTO:
10566 case OP_NOTEXACT:
10567 case OP_NOTPOSSTAR:
10568 case OP_NOTPOSPLUS:
10569 case OP_NOTPOSQUERY:
10570 case OP_NOTPOSUPTO:
10571 case OP_NOTSTARI:
10572 case OP_NOTMINSTARI:
10573 case OP_NOTPLUSI:
10574 case OP_NOTMINPLUSI:
10575 case OP_NOTQUERYI:
10576 case OP_NOTMINQUERYI:
10577 case OP_NOTUPTOI:
10578 case OP_NOTMINUPTOI:
10579 case OP_NOTEXACTI:
10580 case OP_NOTPOSSTARI:
10581 case OP_NOTPOSPLUSI:
10582 case OP_NOTPOSQUERYI:
10583 case OP_NOTPOSUPTOI:
10584 case OP_TYPESTAR:
10585 case OP_TYPEMINSTAR:
10586 case OP_TYPEPLUS:
10587 case OP_TYPEMINPLUS:
10588 case OP_TYPEQUERY:
10589 case OP_TYPEMINQUERY:
10590 case OP_TYPEUPTO:
10591 case OP_TYPEMINUPTO:
10592 case OP_TYPEEXACT:
10593 case OP_TYPEPOSSTAR:
10594 case OP_TYPEPOSPLUS:
10595 case OP_TYPEPOSQUERY:
10596 case OP_TYPEPOSUPTO:
10597 case OP_CLASS:
10598 case OP_NCLASS:
10599 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
10600 case OP_XCLASS:
10601 #endif
10602 compile_iterator_backtrackingpath(common, current);
10603 break;
10604
10605 case OP_REF:
10606 case OP_REFI:
10607 case OP_DNREF:
10608 case OP_DNREFI:
10609 compile_ref_iterator_backtrackingpath(common, current);
10610 break;
10611
10612 case OP_RECURSE:
10613 compile_recurse_backtrackingpath(common, current);
10614 break;
10615
10616 case OP_ASSERT:
10617 case OP_ASSERT_NOT:
10618 case OP_ASSERTBACK:
10619 case OP_ASSERTBACK_NOT:
10620 compile_assert_backtrackingpath(common, current);
10621 break;
10622
10623 case OP_ONCE:
10624 case OP_ONCE_NC:
10625 case OP_BRA:
10626 case OP_CBRA:
10627 case OP_COND:
10628 case OP_SBRA:
10629 case OP_SCBRA:
10630 case OP_SCOND:
10631 compile_bracket_backtrackingpath(common, current);
10632 break;
10633
10634 case OP_BRAZERO:
10635 if (current->cc[1] > OP_ASSERTBACK_NOT)
10636 compile_bracket_backtrackingpath(common, current);
10637 else
10638 compile_assert_backtrackingpath(common, current);
10639 break;
10640
10641 case OP_BRAPOS:
10642 case OP_CBRAPOS:
10643 case OP_SBRAPOS:
10644 case OP_SCBRAPOS:
10645 case OP_BRAPOSZERO:
10646 compile_bracketpos_backtrackingpath(common, current);
10647 break;
10648
10649 case OP_BRAMINZERO:
10650 compile_braminzero_backtrackingpath(common, current);
10651 break;
10652
10653 case OP_MARK:
10654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10655 if (common->has_skip_arg)
10656 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10657 free_stack(common, common->has_skip_arg ? 5 : 1);
10658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10659 if (common->has_skip_arg)
10660 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10661 break;
10662
10663 case OP_THEN:
10664 case OP_THEN_ARG:
10665 case OP_PRUNE:
10666 case OP_PRUNE_ARG:
10667 case OP_SKIP:
10668 case OP_SKIP_ARG:
10669 compile_control_verb_backtrackingpath(common, current);
10670 break;
10671
10672 case OP_COMMIT:
10673 if (!common->local_exit)
10674 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
10675 if (common->quit_label == NULL)
10676 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10677 else
10678 JUMPTO(SLJIT_JUMP, common->quit_label);
10679 break;
10680
10681 case OP_CALLOUT:
10682 case OP_CALLOUT_STR:
10683 case OP_FAIL:
10684 case OP_ACCEPT:
10685 case OP_ASSERT_ACCEPT:
10686 set_jumps(current->topbacktracks, LABEL());
10687 break;
10688
10689 case OP_THEN_TRAP:
10690 /* A virtual opcode for then traps. */
10691 compile_then_trap_backtrackingpath(common, current);
10692 break;
10693
10694 default:
10695 SLJIT_ASSERT_STOP();
10696 break;
10697 }
10698 current = current->prev;
10699 }
10700 common->then_trap = save_then_trap;
10701 }
10702
compile_recurse(compiler_common * common)10703 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10704 {
10705 DEFINE_COMPILER;
10706 PCRE2_SPTR cc = common->start + common->currententry->start;
10707 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10708 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
10709 BOOL needs_control_head;
10710 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10711 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10712 int alternativesize;
10713 BOOL needs_frame;
10714 backtrack_common altbacktrack;
10715 struct sljit_jump *jump;
10716
10717 /* Recurse captures then. */
10718 common->then_trap = NULL;
10719
10720 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10721 needs_frame = framesize >= 0;
10722 if (!needs_frame)
10723 framesize = 0;
10724 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10725
10726 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10727 common->currententry->entry = LABEL();
10728 set_jumps(common->currententry->calls, common->currententry->entry);
10729
10730 sljit_emit_fast_enter(compiler, TMP2, 0);
10731 count_match(common);
10732 allocate_stack(common, private_data_size + framesize + alternativesize);
10733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10734 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10735 if (needs_control_head)
10736 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10738 if (needs_frame)
10739 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10740
10741 if (alternativesize > 0)
10742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10743
10744 memset(&altbacktrack, 0, sizeof(backtrack_common));
10745 common->quit_label = NULL;
10746 common->accept_label = NULL;
10747 common->quit = NULL;
10748 common->accept = NULL;
10749 altbacktrack.cc = ccbegin;
10750 cc += GET(cc, 1);
10751 while (1)
10752 {
10753 altbacktrack.top = NULL;
10754 altbacktrack.topbacktracks = NULL;
10755
10756 if (altbacktrack.cc != ccbegin)
10757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10758
10759 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10760 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10761 return;
10762
10763 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10764
10765 compile_backtrackingpath(common, altbacktrack.top);
10766 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10767 return;
10768 set_jumps(altbacktrack.topbacktracks, LABEL());
10769
10770 if (*cc != OP_ALT)
10771 break;
10772
10773 altbacktrack.cc = cc + 1 + LINK_SIZE;
10774 cc += GET(cc, 1);
10775 }
10776
10777 /* None of them matched. */
10778 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10779 jump = JUMP(SLJIT_JUMP);
10780
10781 if (common->quit != NULL)
10782 {
10783 set_jumps(common->quit, LABEL());
10784 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10785 if (needs_frame)
10786 {
10787 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10788 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10789 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10790 }
10791 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10792 common->quit = NULL;
10793 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10794 }
10795
10796 set_jumps(common->accept, LABEL());
10797 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10798 if (needs_frame)
10799 {
10800 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10801 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10802 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10803 }
10804 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10805
10806 JUMPHERE(jump);
10807 if (common->quit != NULL)
10808 set_jumps(common->quit, LABEL());
10809 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10810 free_stack(common, private_data_size + framesize + alternativesize);
10811 if (needs_control_head)
10812 {
10813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
10814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10815 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10816 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10818 }
10819 else
10820 {
10821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10822 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10824 }
10825 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
10826 }
10827
10828 #undef COMPILE_BACKTRACKINGPATH
10829 #undef CURRENT_AS
10830
jit_compile(pcre2_code * code,sljit_u32 mode)10831 static int jit_compile(pcre2_code *code, sljit_u32 mode)
10832 {
10833 pcre2_real_code *re = (pcre2_real_code *)code;
10834 struct sljit_compiler *compiler;
10835 backtrack_common rootbacktrack;
10836 compiler_common common_data;
10837 compiler_common *common = &common_data;
10838 const sljit_u8 *tables = re->tables;
10839 void *allocator_data = &re->memctl;
10840 int private_data_size;
10841 PCRE2_SPTR ccend;
10842 executable_functions *functions;
10843 void *executable_func;
10844 sljit_uw executable_size;
10845 sljit_uw total_length;
10846 label_addr_list *label_addr;
10847 struct sljit_label *mainloop_label = NULL;
10848 struct sljit_label *continue_match_label;
10849 struct sljit_label *empty_match_found_label = NULL;
10850 struct sljit_label *empty_match_backtrack_label = NULL;
10851 struct sljit_label *reset_match_label;
10852 struct sljit_label *quit_label;
10853 struct sljit_jump *jump;
10854 struct sljit_jump *minlength_check_failed = NULL;
10855 struct sljit_jump *reqbyte_notfound = NULL;
10856 struct sljit_jump *empty_match = NULL;
10857
10858 SLJIT_ASSERT(tables);
10859
10860 memset(&rootbacktrack, 0, sizeof(backtrack_common));
10861 memset(common, 0, sizeof(compiler_common));
10862 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
10863 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
10864
10865 common->start = rootbacktrack.cc;
10866 common->read_only_data_head = NULL;
10867 common->fcc = tables + fcc_offset;
10868 common->lcc = (sljit_sw)(tables + lcc_offset);
10869 common->mode = mode;
10870 common->might_be_empty = re->minlength == 0;
10871 common->nltype = NLTYPE_FIXED;
10872 switch(re->newline_convention)
10873 {
10874 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
10875 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
10876 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
10877 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10878 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10879 default: return PCRE2_ERROR_INTERNAL;
10880 }
10881 common->nlmax = READ_CHAR_MAX;
10882 common->nlmin = 0;
10883 if (re->bsr_convention == PCRE2_BSR_UNICODE)
10884 common->bsr_nltype = NLTYPE_ANY;
10885 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
10886 common->bsr_nltype = NLTYPE_ANYCRLF;
10887 else
10888 {
10889 #ifdef BSR_ANYCRLF
10890 common->bsr_nltype = NLTYPE_ANYCRLF;
10891 #else
10892 common->bsr_nltype = NLTYPE_ANY;
10893 #endif
10894 }
10895 common->bsr_nlmax = READ_CHAR_MAX;
10896 common->bsr_nlmin = 0;
10897 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
10898 common->ctypes = (sljit_sw)(tables + ctypes_offset);
10899 common->name_count = re->name_count;
10900 common->name_entry_size = re->name_entry_size;
10901 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
10902 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
10903 #ifdef SUPPORT_UNICODE
10904 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
10905 common->utf = (re->overall_options & PCRE2_UTF) != 0;
10906 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
10907 if (common->utf)
10908 {
10909 if (common->nltype == NLTYPE_ANY)
10910 common->nlmax = 0x2029;
10911 else if (common->nltype == NLTYPE_ANYCRLF)
10912 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10913 else
10914 {
10915 /* We only care about the first newline character. */
10916 common->nlmax = common->newline & 0xff;
10917 }
10918
10919 if (common->nltype == NLTYPE_FIXED)
10920 common->nlmin = common->newline & 0xff;
10921 else
10922 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10923
10924 if (common->bsr_nltype == NLTYPE_ANY)
10925 common->bsr_nlmax = 0x2029;
10926 else
10927 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10928 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10929 }
10930 #endif /* SUPPORT_UNICODE */
10931 ccend = bracketend(common->start);
10932
10933 /* Calculate the local space size on the stack. */
10934 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
10935 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
10936 if (!common->optimized_cbracket)
10937 return PCRE2_ERROR_NOMEMORY;
10938 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
10939 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10940 #else
10941 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
10942 #endif
10943
10944 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
10945 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
10946 common->capture_last_ptr = common->ovector_start;
10947 common->ovector_start += sizeof(sljit_sw);
10948 #endif
10949 if (!check_opcode_types(common, common->start, ccend))
10950 {
10951 SLJIT_FREE(common->optimized_cbracket, allocator_data);
10952 return PCRE2_ERROR_NOMEMORY;
10953 }
10954
10955 /* Checking flags and updating ovector_start. */
10956 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
10957 {
10958 common->req_char_ptr = common->ovector_start;
10959 common->ovector_start += sizeof(sljit_sw);
10960 }
10961 if (mode != PCRE2_JIT_COMPLETE)
10962 {
10963 common->start_used_ptr = common->ovector_start;
10964 common->ovector_start += sizeof(sljit_sw);
10965 if (mode == PCRE2_JIT_PARTIAL_SOFT)
10966 {
10967 common->hit_start = common->ovector_start;
10968 common->ovector_start += sizeof(sljit_sw);
10969 }
10970 }
10971 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
10972 {
10973 common->match_end_ptr = common->ovector_start;
10974 common->ovector_start += sizeof(sljit_sw);
10975 }
10976 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
10977 common->control_head_ptr = 1;
10978 #endif
10979 if (common->control_head_ptr != 0)
10980 {
10981 common->control_head_ptr = common->ovector_start;
10982 common->ovector_start += sizeof(sljit_sw);
10983 }
10984 if (common->has_set_som)
10985 {
10986 /* Saving the real start pointer is necessary. */
10987 common->start_ptr = common->ovector_start;
10988 common->ovector_start += sizeof(sljit_sw);
10989 }
10990
10991 /* Aligning ovector to even number of sljit words. */
10992 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
10993 common->ovector_start += sizeof(sljit_sw);
10994
10995 if (common->start_ptr == 0)
10996 common->start_ptr = OVECTOR(0);
10997
10998 /* Capturing brackets cannot be optimized if callouts are allowed. */
10999 if (common->capture_last_ptr != 0)
11000 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11001
11002 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11003 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11004
11005 total_length = ccend - common->start;
11006 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
11007 if (!common->private_data_ptrs)
11008 {
11009 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11010 return PCRE2_ERROR_NOMEMORY;
11011 }
11012 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11013
11014 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11015 set_private_data_ptrs(common, &private_data_size, ccend);
11016 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11017 {
11018 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11019 detect_fast_fail(common, common->start, &private_data_size, 4);
11020 }
11021
11022 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11023
11024 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11025 {
11026 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11027 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11028 return PCRE2_ERROR_NOMEMORY;
11029 }
11030
11031 if (common->has_then)
11032 {
11033 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11034 memset(common->then_offsets, 0, total_length);
11035 set_then_offsets(common, common->start, NULL);
11036 }
11037
11038 compiler = sljit_create_compiler(allocator_data);
11039 if (!compiler)
11040 {
11041 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11042 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11043 return PCRE2_ERROR_NOMEMORY;
11044 }
11045 common->compiler = compiler;
11046
11047 /* Main pcre_jit_exec entry. */
11048 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
11049
11050 /* Register init. */
11051 reset_ovector(common, (re->top_bracket + 1) * 2);
11052 if (common->req_char_ptr != 0)
11053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11054
11055 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11057 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11058 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11059 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11060 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11061 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
11062 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
11063 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11065
11066 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11067 reset_fast_fail(common);
11068
11069 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11071 if (common->mark_ptr != 0)
11072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11073 if (common->control_head_ptr != 0)
11074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11075
11076 /* Main part of the matching */
11077 if ((re->overall_options & PCRE2_ANCHORED) == 0)
11078 {
11079 mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options);
11080 continue_match_label = LABEL();
11081 /* Forward search if possible. */
11082 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11083 {
11084 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
11085 ;
11086 else if ((re->flags & PCRE2_FIRSTSET) != 0)
11087 fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0);
11088 else if ((re->flags & PCRE2_STARTLINE) != 0)
11089 fast_forward_newline(common);
11090 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
11091 fast_forward_start_bits(common, re->start_bitmap);
11092 }
11093 }
11094 else
11095 continue_match_label = LABEL();
11096
11097 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
11098 {
11099 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11100 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
11101 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11102 }
11103 if (common->req_char_ptr != 0)
11104 reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
11105
11106 /* Store the current STR_PTR in OVECTOR(0). */
11107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11108 /* Copy the limit of allowed recursions. */
11109 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11110 if (common->capture_last_ptr != 0)
11111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
11112 if (common->fast_forward_bc_ptr != NULL)
11113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11114
11115 if (common->start_ptr != OVECTOR(0))
11116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11117
11118 /* Copy the beginning of the string. */
11119 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11120 {
11121 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11123 JUMPHERE(jump);
11124 }
11125 else if (mode == PCRE2_JIT_PARTIAL_HARD)
11126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11127
11128 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11129 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11130 {
11131 sljit_free_compiler(compiler);
11132 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11133 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11134 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11135 return PCRE2_ERROR_NOMEMORY;
11136 }
11137
11138 if (common->might_be_empty)
11139 {
11140 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11141 empty_match_found_label = LABEL();
11142 }
11143
11144 common->accept_label = LABEL();
11145 if (common->accept != NULL)
11146 set_jumps(common->accept, common->accept_label);
11147
11148 /* This means we have a match. Update the ovector. */
11149 copy_ovector(common, re->top_bracket + 1);
11150 common->quit_label = common->forced_quit_label = LABEL();
11151 if (common->quit != NULL)
11152 set_jumps(common->quit, common->quit_label);
11153 if (common->forced_quit != NULL)
11154 set_jumps(common->forced_quit, common->forced_quit_label);
11155 if (minlength_check_failed != NULL)
11156 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11157 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11158
11159 if (mode != PCRE2_JIT_COMPLETE)
11160 {
11161 common->partialmatchlabel = LABEL();
11162 set_jumps(common->partialmatch, common->partialmatchlabel);
11163 return_with_partial_match(common, common->quit_label);
11164 }
11165
11166 if (common->might_be_empty)
11167 empty_match_backtrack_label = LABEL();
11168 compile_backtrackingpath(common, rootbacktrack.top);
11169 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11170 {
11171 sljit_free_compiler(compiler);
11172 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11173 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11174 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11175 return PCRE2_ERROR_NOMEMORY;
11176 }
11177
11178 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11179 reset_match_label = LABEL();
11180
11181 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11182 {
11183 /* Update hit_start only in the first time. */
11184 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
11186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11188 JUMPHERE(jump);
11189 }
11190
11191 /* Check we have remaining characters. */
11192 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
11193 {
11194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11195 }
11196
11197 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11198 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11199
11200 if ((re->overall_options & PCRE2_ANCHORED) == 0)
11201 {
11202 if (common->ff_newline_shortcut != NULL)
11203 {
11204 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
11205 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
11206 {
11207 if (common->match_end_ptr != 0)
11208 {
11209 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
11210 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
11211 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
11212 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
11213 }
11214 else
11215 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11216 }
11217 }
11218 else
11219 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
11220 }
11221
11222 /* No more remaining characters. */
11223 if (reqbyte_notfound != NULL)
11224 JUMPHERE(reqbyte_notfound);
11225
11226 if (mode == PCRE2_JIT_PARTIAL_SOFT)
11227 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11228
11229 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11230 JUMPTO(SLJIT_JUMP, common->quit_label);
11231
11232 flush_stubs(common);
11233
11234 if (common->might_be_empty)
11235 {
11236 JUMPHERE(empty_match);
11237 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11238 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11239 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11240 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
11241 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11242 JUMPTO(SLJIT_ZERO, empty_match_found_label);
11243 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11244 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11245 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11246 }
11247
11248 common->fast_forward_bc_ptr = NULL;
11249 common->fast_fail_start_ptr = 0;
11250 common->fast_fail_end_ptr = 0;
11251 common->currententry = common->entries;
11252 common->local_exit = TRUE;
11253 quit_label = common->quit_label;
11254 while (common->currententry != NULL)
11255 {
11256 /* Might add new entries. */
11257 compile_recurse(common);
11258 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11259 {
11260 sljit_free_compiler(compiler);
11261 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11262 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11263 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11264 return PCRE2_ERROR_NOMEMORY;
11265 }
11266 flush_stubs(common);
11267 common->currententry = common->currententry->next;
11268 }
11269 common->local_exit = FALSE;
11270 common->quit_label = quit_label;
11271
11272 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11273 /* This is a (really) rare case. */
11274 set_jumps(common->stackalloc, LABEL());
11275 /* RETURN_ADDR is not a saved register. */
11276 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
11278 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11280 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
11281 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
11282
11283 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11284 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11285 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11287 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
11288 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
11289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11290 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11291
11292 /* Allocation failed. */
11293 JUMPHERE(jump);
11294 /* We break the return address cache here, but this is a really rare case. */
11295 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
11296 JUMPTO(SLJIT_JUMP, common->quit_label);
11297
11298 /* Call limit reached. */
11299 set_jumps(common->calllimit, LABEL());
11300 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
11301 JUMPTO(SLJIT_JUMP, common->quit_label);
11302
11303 if (common->revertframes != NULL)
11304 {
11305 set_jumps(common->revertframes, LABEL());
11306 do_revertframes(common);
11307 }
11308 if (common->wordboundary != NULL)
11309 {
11310 set_jumps(common->wordboundary, LABEL());
11311 check_wordboundary(common);
11312 }
11313 if (common->anynewline != NULL)
11314 {
11315 set_jumps(common->anynewline, LABEL());
11316 check_anynewline(common);
11317 }
11318 if (common->hspace != NULL)
11319 {
11320 set_jumps(common->hspace, LABEL());
11321 check_hspace(common);
11322 }
11323 if (common->vspace != NULL)
11324 {
11325 set_jumps(common->vspace, LABEL());
11326 check_vspace(common);
11327 }
11328 if (common->casefulcmp != NULL)
11329 {
11330 set_jumps(common->casefulcmp, LABEL());
11331 do_casefulcmp(common);
11332 }
11333 if (common->caselesscmp != NULL)
11334 {
11335 set_jumps(common->caselesscmp, LABEL());
11336 do_caselesscmp(common);
11337 }
11338 if (common->reset_match != NULL)
11339 {
11340 set_jumps(common->reset_match, LABEL());
11341 do_reset_match(common, (re->top_bracket + 1) * 2);
11342 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11343 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11344 JUMPTO(SLJIT_JUMP, reset_match_label);
11345 }
11346 #ifdef SUPPORT_UNICODE
11347 #if PCRE2_CODE_UNIT_WIDTH == 8
11348 if (common->utfreadchar != NULL)
11349 {
11350 set_jumps(common->utfreadchar, LABEL());
11351 do_utfreadchar(common);
11352 }
11353 if (common->utfreadchar16 != NULL)
11354 {
11355 set_jumps(common->utfreadchar16, LABEL());
11356 do_utfreadchar16(common);
11357 }
11358 if (common->utfreadtype8 != NULL)
11359 {
11360 set_jumps(common->utfreadtype8, LABEL());
11361 do_utfreadtype8(common);
11362 }
11363 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
11364 if (common->getucd != NULL)
11365 {
11366 set_jumps(common->getucd, LABEL());
11367 do_getucd(common);
11368 }
11369 #endif /* SUPPORT_UNICODE */
11370
11371 SLJIT_FREE(common->optimized_cbracket, allocator_data);
11372 SLJIT_FREE(common->private_data_ptrs, allocator_data);
11373
11374 executable_func = sljit_generate_code(compiler);
11375 executable_size = sljit_get_generated_code_size(compiler);
11376 label_addr = common->label_addrs;
11377 while (label_addr != NULL)
11378 {
11379 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11380 label_addr = label_addr->next;
11381 }
11382 sljit_free_compiler(compiler);
11383 if (executable_func == NULL)
11384 {
11385 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11386 return PCRE2_ERROR_NOMEMORY;
11387 }
11388
11389 /* Reuse the function descriptor if possible. */
11390 if (re->executable_jit != NULL)
11391 functions = (executable_functions *)re->executable_jit;
11392 else
11393 {
11394 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
11395 if (functions == NULL)
11396 {
11397 /* This case is highly unlikely since we just recently
11398 freed a lot of memory. Not impossible though. */
11399 sljit_free_code(executable_func);
11400 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
11401 return PCRE2_ERROR_NOMEMORY;
11402 }
11403 memset(functions, 0, sizeof(executable_functions));
11404 functions->top_bracket = re->top_bracket + 1;
11405 functions->limit_match = re->limit_match;
11406 re->executable_jit = functions;
11407 }
11408
11409 /* Turn mode into an index. */
11410 if (mode == PCRE2_JIT_COMPLETE)
11411 mode = 0;
11412 else
11413 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
11414
11415 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
11416 functions->executable_funcs[mode] = executable_func;
11417 functions->read_only_data_heads[mode] = common->read_only_data_head;
11418 functions->executable_sizes[mode] = executable_size;
11419 return 0;
11420 }
11421
11422 #endif
11423
11424 /*************************************************
11425 * JIT compile a Regular Expression *
11426 *************************************************/
11427
11428 /* This function used JIT to convert a previously-compiled pattern into machine
11429 code.
11430
11431 Arguments:
11432 code a compiled pattern
11433 options JIT option bits
11434
11435 Returns: 0: success or (*NOJIT) was used
11436 <0: an error code
11437 */
11438
11439 #define PUBLIC_JIT_COMPILE_OPTIONS \
11440 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
11441
11442 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)11443 pcre2_jit_compile(pcre2_code *code, uint32_t options)
11444 {
11445 #ifndef SUPPORT_JIT
11446
11447 (void)code;
11448 (void)options;
11449 return PCRE2_ERROR_JIT_BADOPTION;
11450
11451 #else /* SUPPORT_JIT */
11452
11453 pcre2_real_code *re = (pcre2_real_code *)code;
11454 executable_functions *functions;
11455 int result;
11456
11457 if (code == NULL)
11458 return PCRE2_ERROR_NULL;
11459
11460 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
11461 return PCRE2_ERROR_JIT_BADOPTION;
11462
11463 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
11464
11465 functions = (executable_functions *)re->executable_jit;
11466
11467 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
11468 || functions->executable_funcs[0] == NULL)) {
11469 result = jit_compile(code, PCRE2_JIT_COMPLETE);
11470 if (result != 0)
11471 return result;
11472 }
11473
11474 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
11475 || functions->executable_funcs[1] == NULL)) {
11476 result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
11477 if (result != 0)
11478 return result;
11479 }
11480
11481 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
11482 || functions->executable_funcs[2] == NULL)) {
11483 result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
11484 if (result != 0)
11485 return result;
11486 }
11487
11488 return 0;
11489
11490 #endif /* SUPPORT_JIT */
11491 }
11492
11493 /* JIT compiler uses an all-in-one approach. This improves security,
11494 since the code generator functions are not exported. */
11495
11496 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
11497
11498 #include "pcre2_jit_match.c"
11499 #include "pcre2_jit_misc.c"
11500
11501 /* End of pcre2_jit_compile.c */
11502