1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include "pcre2_internal.h"
46
47 #ifdef SUPPORT_JIT
48
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77
78 #include "sljit/sljitLir.c"
79
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83
84 /* Defines for debugging purposes. */
85
86 /* 1 - Use unoptimized capturing brackets.
87 2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114
115 'ab' - 'a' and 'b' regexps are concatenated
116 'a+' - 'a' is the sub-expression of the '+' operator
117
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123
124 Greedy star operator (*) :
125 Matching path: match happens.
126 Backtrack path: match failed.
127 Non-greedy star operator (*?) :
128 Matching path: no need to perform a match.
129 Backtrack path: match is required.
130
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134
135 A(B|C)D
136
137 The generated code will be the following:
138
139 A matching path
140 '(' matching path (pushing arguments to the stack)
141 B matching path
142 ')' matching path (pushing arguments to the stack)
143 D matching path
144 return with successful match
145
146 D backtrack path
147 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148 B backtrack path
149 C expected path
150 jump to D matching path
151 C backtrack path
152 A backtrack path
153
154 Notice, that the order of backtrack code paths are the opposite of the fast
155 code paths. In this way the topmost value on the stack is always belong
156 to the current backtrack code path. The backtrack path must check
157 whether there is a next alternative. If so, it needs to jump back to
158 the matching path eventually. Otherwise it needs to clear out its own stack
159 frame and continue the execution on the backtrack code paths.
160 */
161
162 /*
163 Saved stack frames:
164
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172
173 Thus we can restore the private data to a particular point in the stack.
174 */
175
176 typedef struct jit_arguments {
177 /* Pointers first. */
178 struct sljit_stack *stack;
179 PCRE2_SPTR str;
180 PCRE2_SPTR begin;
181 PCRE2_SPTR end;
182 pcre2_match_data *match_data;
183 PCRE2_SPTR startchar_ptr;
184 PCRE2_UCHAR *mark_ptr;
185 int (*callout)(pcre2_callout_block *, void *);
186 void *callout_data;
187 /* Everything else after. */
188 sljit_uw offset_limit;
189 sljit_u32 limit_match;
190 sljit_u32 oveccount;
191 sljit_u32 options;
192 } jit_arguments;
193
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195
196 typedef struct executable_functions {
197 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_u32 top_bracket;
201 sljit_u32 limit_match;
202 } executable_functions;
203
204 typedef struct jump_list {
205 struct sljit_jump *jump;
206 struct jump_list *next;
207 } jump_list;
208
209 typedef struct stub_list {
210 struct sljit_jump *start;
211 struct sljit_label *quit;
212 struct stub_list *next;
213 } stub_list;
214
215 typedef struct label_addr_list {
216 struct sljit_label *label;
217 sljit_uw *update_addr;
218 struct label_addr_list *next;
219 } label_addr_list;
220
221 enum frame_types {
222 no_frame = -1,
223 no_stack = -2
224 };
225
226 enum control_types {
227 type_mark = 0,
228 type_then_trap = 1
229 };
230
231 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
232
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238 /* Concatenation stack. */
239 struct backtrack_common *prev;
240 jump_list *nextbacktracks;
241 /* Internal stack (for component operators). */
242 struct backtrack_common *top;
243 jump_list *topbacktracks;
244 /* Opcode pointer. */
245 PCRE2_SPTR cc;
246 } backtrack_common;
247
248 typedef struct assert_backtrack {
249 backtrack_common common;
250 jump_list *condfailed;
251 /* Less than 0 if a frame is not needed. */
252 int framesize;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 /* For iterators. */
256 struct sljit_label *matchingpath;
257 } assert_backtrack;
258
259 typedef struct bracket_backtrack {
260 backtrack_common common;
261 /* Where to coninue if an alternative is successfully matched. */
262 struct sljit_label *alternative_matchingpath;
263 /* For rmin and rmax iterators. */
264 struct sljit_label *recursive_matchingpath;
265 /* For greedy ? operator. */
266 struct sljit_label *zero_matchingpath;
267 /* Contains the branches of a failed condition. */
268 union {
269 /* Both for OP_COND, OP_SCOND. */
270 jump_list *condfailed;
271 assert_backtrack *assert;
272 /* For OP_ONCE. Less than 0 if not needed. */
273 int framesize;
274 } u;
275 /* Points to our private memory word on the stack. */
276 int private_data_ptr;
277 } bracket_backtrack;
278
279 typedef struct bracketpos_backtrack {
280 backtrack_common common;
281 /* Points to our private memory word on the stack. */
282 int private_data_ptr;
283 /* Reverting stack is needed. */
284 int framesize;
285 /* Allocated stack size. */
286 int stacksize;
287 } bracketpos_backtrack;
288
289 typedef struct braminzero_backtrack {
290 backtrack_common common;
291 struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293
294 typedef struct char_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 union {
299 jump_list *backtracks;
300 struct {
301 unsigned int othercasebit;
302 PCRE2_UCHAR chr;
303 BOOL enabled;
304 } charpos;
305 } u;
306 } char_iterator_backtrack;
307
308 typedef struct ref_iterator_backtrack {
309 backtrack_common common;
310 /* Next iteration. */
311 struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313
314 typedef struct recurse_entry {
315 struct recurse_entry *next;
316 /* Contains the function entry label. */
317 struct sljit_label *entry_label;
318 /* Contains the function entry label. */
319 struct sljit_label *backtrack_label;
320 /* Collects the entry calls until the function is not created. */
321 jump_list *entry_calls;
322 /* Collects the backtrack calls until the function is not created. */
323 jump_list *backtrack_calls;
324 /* Points to the starting opcode. */
325 sljit_sw start;
326 } recurse_entry;
327
328 typedef struct recurse_backtrack {
329 backtrack_common common;
330 /* Return to the matching path. */
331 struct sljit_label *matchingpath;
332 /* Recursive pattern. */
333 recurse_entry *entry;
334 /* Pattern is inlined. */
335 BOOL inlined_pattern;
336 } recurse_backtrack;
337
338 #define OP_THEN_TRAP OP_TABLE_LENGTH
339
340 typedef struct then_trap_backtrack {
341 backtrack_common common;
342 /* If then_trap is not NULL, this structure contains the real
343 then_trap for the backtracking path. */
344 struct then_trap_backtrack *then_trap;
345 /* Points to the starting opcode. */
346 sljit_sw start;
347 /* Exit point for the then opcodes of this alternative. */
348 jump_list *quit;
349 /* Frame size of the current alternative. */
350 int framesize;
351 } then_trap_backtrack;
352
353 #define MAX_N_CHARS 12
354 #define MAX_DIFF_CHARS 5
355
356 typedef struct fast_forward_char_data {
357 /* Number of characters in the chars array, 255 for any character. */
358 sljit_u8 count;
359 /* Number of last UTF-8 characters in the chars array. */
360 sljit_u8 last_count;
361 /* Available characters in the current position. */
362 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
363 } fast_forward_char_data;
364
365 #define MAX_CLASS_RANGE_SIZE 4
366 #define MAX_CLASS_CHARS_SIZE 3
367
368 typedef struct compiler_common {
369 /* The sljit ceneric compiler. */
370 struct sljit_compiler *compiler;
371 /* Compiled regular expression. */
372 pcre2_real_code *re;
373 /* First byte code. */
374 PCRE2_SPTR start;
375 /* Maps private data offset to each opcode. */
376 sljit_s32 *private_data_ptrs;
377 /* Chain list of read-only data ptrs. */
378 void *read_only_data_head;
379 /* Tells whether the capturing bracket is optimized. */
380 sljit_u8 *optimized_cbracket;
381 /* Tells whether the starting offset is a target of then. */
382 sljit_u8 *then_offsets;
383 /* Current position where a THEN must jump. */
384 then_trap_backtrack *then_trap;
385 /* Starting offset of private data for capturing brackets. */
386 sljit_s32 cbra_ptr;
387 /* Output vector starting point. Must be divisible by 2. */
388 sljit_s32 ovector_start;
389 /* Points to the starting character of the current match. */
390 sljit_s32 start_ptr;
391 /* Last known position of the requested byte. */
392 sljit_s32 req_char_ptr;
393 /* Head of the last recursion. */
394 sljit_s32 recursive_head_ptr;
395 /* First inspected character for partial matching.
396 (Needed for avoiding zero length partial matches.) */
397 sljit_s32 start_used_ptr;
398 /* Starting pointer for partial soft matches. */
399 sljit_s32 hit_start;
400 /* Pointer of the match end position. */
401 sljit_s32 match_end_ptr;
402 /* Points to the marked string. */
403 sljit_s32 mark_ptr;
404 /* Recursive control verb management chain. */
405 sljit_s32 control_head_ptr;
406 /* Points to the last matched capture block index. */
407 sljit_s32 capture_last_ptr;
408 /* Fast forward skipping byte code pointer. */
409 PCRE2_SPTR fast_forward_bc_ptr;
410 /* Locals used by fast fail optimization. */
411 sljit_s32 fast_fail_start_ptr;
412 sljit_s32 fast_fail_end_ptr;
413
414 /* Flipped and lower case tables. */
415 const sljit_u8 *fcc;
416 sljit_sw lcc;
417 /* Mode can be PCRE2_JIT_COMPLETE and others. */
418 int mode;
419 /* TRUE, when minlength is greater than 0. */
420 BOOL might_be_empty;
421 /* \K is found in the pattern. */
422 BOOL has_set_som;
423 /* (*SKIP:arg) is found in the pattern. */
424 BOOL has_skip_arg;
425 /* (*THEN) is found in the pattern. */
426 BOOL has_then;
427 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
428 BOOL has_skip_in_assert_back;
429 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
430 BOOL local_quit_available;
431 /* Currently in a positive assertion. */
432 BOOL in_positive_assertion;
433 /* Newline control. */
434 int nltype;
435 sljit_u32 nlmax;
436 sljit_u32 nlmin;
437 int newline;
438 int bsr_nltype;
439 sljit_u32 bsr_nlmax;
440 sljit_u32 bsr_nlmin;
441 /* Dollar endonly. */
442 int endonly;
443 /* Tables. */
444 sljit_sw ctypes;
445 /* Named capturing brackets. */
446 PCRE2_SPTR name_table;
447 sljit_sw name_count;
448 sljit_sw name_entry_size;
449
450 /* Labels and jump lists. */
451 struct sljit_label *partialmatchlabel;
452 struct sljit_label *quit_label;
453 struct sljit_label *abort_label;
454 struct sljit_label *accept_label;
455 struct sljit_label *ff_newline_shortcut;
456 stub_list *stubs;
457 label_addr_list *label_addrs;
458 recurse_entry *entries;
459 recurse_entry *currententry;
460 jump_list *partialmatch;
461 jump_list *quit;
462 jump_list *positive_assertion_quit;
463 jump_list *abort;
464 jump_list *failed_match;
465 jump_list *accept;
466 jump_list *calllimit;
467 jump_list *stackalloc;
468 jump_list *revertframes;
469 jump_list *wordboundary;
470 jump_list *anynewline;
471 jump_list *hspace;
472 jump_list *vspace;
473 jump_list *casefulcmp;
474 jump_list *caselesscmp;
475 jump_list *reset_match;
476 BOOL unset_backref;
477 BOOL alt_circumflex;
478 #ifdef SUPPORT_UNICODE
479 BOOL utf;
480 BOOL invalid_utf;
481 BOOL use_ucp;
482 /* Points to saving area for iref. */
483 sljit_s32 iref_ptr;
484 jump_list *getucd;
485 jump_list *getucdtype;
486 #if PCRE2_CODE_UNIT_WIDTH == 8
487 jump_list *utfreadchar;
488 jump_list *utfreadtype8;
489 jump_list *utfpeakcharback;
490 #endif
491 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
492 jump_list *utfreadchar_invalid;
493 jump_list *utfreadnewline_invalid;
494 jump_list *utfmoveback_invalid;
495 jump_list *utfpeakcharback_invalid;
496 #endif
497 #endif /* SUPPORT_UNICODE */
498 } compiler_common;
499
500 /* For byte_sequence_compare. */
501
502 typedef struct compare_context {
503 int length;
504 int sourcereg;
505 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
506 int ucharptr;
507 union {
508 sljit_s32 asint;
509 sljit_u16 asushort;
510 #if PCRE2_CODE_UNIT_WIDTH == 8
511 sljit_u8 asbyte;
512 sljit_u8 asuchars[4];
513 #elif PCRE2_CODE_UNIT_WIDTH == 16
514 sljit_u16 asuchars[2];
515 #elif PCRE2_CODE_UNIT_WIDTH == 32
516 sljit_u32 asuchars[1];
517 #endif
518 } c;
519 union {
520 sljit_s32 asint;
521 sljit_u16 asushort;
522 #if PCRE2_CODE_UNIT_WIDTH == 8
523 sljit_u8 asbyte;
524 sljit_u8 asuchars[4];
525 #elif PCRE2_CODE_UNIT_WIDTH == 16
526 sljit_u16 asuchars[2];
527 #elif PCRE2_CODE_UNIT_WIDTH == 32
528 sljit_u32 asuchars[1];
529 #endif
530 } oc;
531 #endif
532 } compare_context;
533
534 /* Undefine sljit macros. */
535 #undef CMP
536
537 /* Used for accessing the elements of the stack. */
538 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
539
540 #ifdef SLJIT_PREF_SHIFT_REG
541 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
542 /* Nothing. */
543 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
544 #define SHIFT_REG_IS_R3
545 #else
546 #error "Unsupported shift register"
547 #endif
548 #endif
549
550 #define TMP1 SLJIT_R0
551 #ifdef SHIFT_REG_IS_R3
552 #define TMP2 SLJIT_R3
553 #define TMP3 SLJIT_R2
554 #else
555 #define TMP2 SLJIT_R2
556 #define TMP3 SLJIT_R3
557 #endif
558 #define STR_PTR SLJIT_R1
559 #define STR_END SLJIT_S0
560 #define STACK_TOP SLJIT_S1
561 #define STACK_LIMIT SLJIT_S2
562 #define COUNT_MATCH SLJIT_S3
563 #define ARGUMENTS SLJIT_S4
564 #define RETURN_ADDR SLJIT_R4
565
566 /* Local space layout. */
567 /* These two locals can be used by the current opcode. */
568 #define LOCALS0 (0 * sizeof(sljit_sw))
569 #define LOCALS1 (1 * sizeof(sljit_sw))
570 /* Two local variables for possessive quantifiers (char1 cannot use them). */
571 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
572 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
573 /* Max limit of recursions. */
574 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
575 /* The output vector is stored on the stack, and contains pointers
576 to characters. The vector data is divided into two groups: the first
577 group contains the start / end character pointers, and the second is
578 the start pointers when the end of the capturing group has not yet reached. */
579 #define OVECTOR_START (common->ovector_start)
580 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
581 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
582 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
583
584 #if PCRE2_CODE_UNIT_WIDTH == 8
585 #define MOV_UCHAR SLJIT_MOV_U8
586 #define IN_UCHARS(x) (x)
587 #elif PCRE2_CODE_UNIT_WIDTH == 16
588 #define MOV_UCHAR SLJIT_MOV_U16
589 #define UCHAR_SHIFT (1)
590 #define IN_UCHARS(x) ((x) * 2)
591 #elif PCRE2_CODE_UNIT_WIDTH == 32
592 #define MOV_UCHAR SLJIT_MOV_U32
593 #define UCHAR_SHIFT (2)
594 #define IN_UCHARS(x) ((x) * 4)
595 #else
596 #error Unsupported compiling mode
597 #endif
598
599 /* Shortcuts. */
600 #define DEFINE_COMPILER \
601 struct sljit_compiler *compiler = common->compiler
602 #define OP1(op, dst, dstw, src, srcw) \
603 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
604 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
605 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
606 #define LABEL() \
607 sljit_emit_label(compiler)
608 #define JUMP(type) \
609 sljit_emit_jump(compiler, (type))
610 #define JUMPTO(type, label) \
611 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
612 #define JUMPHERE(jump) \
613 sljit_set_label((jump), sljit_emit_label(compiler))
614 #define SET_LABEL(jump, label) \
615 sljit_set_label((jump), (label))
616 #define CMP(type, src1, src1w, src2, src2w) \
617 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
618 #define CMPTO(type, src1, src1w, src2, src2w, label) \
619 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
620 #define OP_FLAGS(op, dst, dstw, type) \
621 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
622 #define CMOV(type, dst_reg, src, srcw) \
623 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
624 #define GET_LOCAL_BASE(dst, dstw, offset) \
625 sljit_get_local_base(compiler, (dst), (dstw), (offset))
626
627 #define READ_CHAR_MAX 0x7fffffff
628
629 #define INVALID_UTF_CHAR -1
630 #define UNASSIGNED_UTF_CHAR 888
631
632 #if defined SUPPORT_UNICODE
633 #if PCRE2_CODE_UNIT_WIDTH == 8
634
635 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
636 { \
637 if (ptr[0] <= 0x7f) \
638 c = *ptr++; \
639 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
640 { \
641 c = ptr[1] - 0x80; \
642 \
643 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
644 { \
645 c |= (ptr[0] - 0xc0) << 6; \
646 ptr += 2; \
647 } \
648 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
649 { \
650 c = c << 6 | (ptr[2] - 0x80); \
651 \
652 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
653 { \
654 c |= (ptr[0] - 0xe0) << 12; \
655 ptr += 3; \
656 \
657 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
658 { \
659 invalid_action; \
660 } \
661 } \
662 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
663 { \
664 c = c << 6 | (ptr[3] - 0x80); \
665 \
666 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
667 { \
668 c |= (ptr[0] - 0xf0) << 18; \
669 ptr += 4; \
670 \
671 if (c >= 0x110000 || c < 0x10000) \
672 { \
673 invalid_action; \
674 } \
675 } \
676 else \
677 { \
678 invalid_action; \
679 } \
680 } \
681 else \
682 { \
683 invalid_action; \
684 } \
685 } \
686 else \
687 { \
688 invalid_action; \
689 } \
690 } \
691 else \
692 { \
693 invalid_action; \
694 } \
695 }
696
697 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
698 { \
699 if (ptr[-1] <= 0x7f) \
700 c = *ptr--; \
701 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
702 { \
703 c = ptr[-1] - 0x80; \
704 \
705 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
706 { \
707 c |= (ptr[-2] - 0xc0) << 6; \
708 ptr -= 2; \
709 } \
710 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
711 { \
712 c = c << 6 | (ptr[-2] - 0x80); \
713 \
714 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
715 { \
716 c |= (ptr[-3] - 0xe0) << 12; \
717 ptr -= 3; \
718 \
719 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
720 { \
721 invalid_action; \
722 } \
723 } \
724 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
725 { \
726 c = c << 6 | (ptr[-3] - 0x80); \
727 \
728 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
729 { \
730 c |= (ptr[-4] - 0xf0) << 18; \
731 ptr -= 4; \
732 \
733 if (c >= 0x110000 || c < 0x10000) \
734 { \
735 invalid_action; \
736 } \
737 } \
738 else \
739 { \
740 invalid_action; \
741 } \
742 } \
743 else \
744 { \
745 invalid_action; \
746 } \
747 } \
748 else \
749 { \
750 invalid_action; \
751 } \
752 } \
753 else \
754 { \
755 invalid_action; \
756 } \
757 }
758
759 #elif PCRE2_CODE_UNIT_WIDTH == 16
760
761 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
762 { \
763 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
764 c = *ptr++; \
765 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
766 { \
767 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
768 ptr += 2; \
769 } \
770 else \
771 { \
772 invalid_action; \
773 } \
774 }
775
776 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
777 { \
778 if (ptr[-1] < 0xd800 || ptr[-1] >= 0xe000) \
779 c = *ptr--; \
780 else if (ptr[-1] >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
781 { \
782 c = (((ptr[-2] - 0xd800) << 10) | (ptr[-1] - 0xdc00)) + 0x10000; \
783 ptr -= 2; \
784 } \
785 else \
786 { \
787 invalid_action; \
788 } \
789 }
790
791
792 #elif PCRE2_CODE_UNIT_WIDTH == 32
793
794 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
795 { \
796 if (ptr[0] < 0x110000) \
797 c = *ptr++; \
798 else \
799 { \
800 invalid_action; \
801 } \
802 }
803
804 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
805 #endif /* SUPPORT_UNICODE */
806
bracketend(PCRE2_SPTR cc)807 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
808 {
809 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
810 do cc += GET(cc, 1); while (*cc == OP_ALT);
811 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
812 cc += 1 + LINK_SIZE;
813 return cc;
814 }
815
no_alternatives(PCRE2_SPTR cc)816 static int no_alternatives(PCRE2_SPTR cc)
817 {
818 int count = 0;
819 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
820 do
821 {
822 cc += GET(cc, 1);
823 count++;
824 }
825 while (*cc == OP_ALT);
826 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
827 return count;
828 }
829
830 /* Functions whose might need modification for all new supported opcodes:
831 next_opcode
832 check_opcode_types
833 set_private_data_ptrs
834 get_framesize
835 init_frame
836 get_recurse_data_length
837 copy_recurse_data
838 compile_matchingpath
839 compile_backtrackingpath
840 */
841
next_opcode(compiler_common * common,PCRE2_SPTR cc)842 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
843 {
844 SLJIT_UNUSED_ARG(common);
845 switch(*cc)
846 {
847 case OP_SOD:
848 case OP_SOM:
849 case OP_SET_SOM:
850 case OP_NOT_WORD_BOUNDARY:
851 case OP_WORD_BOUNDARY:
852 case OP_NOT_DIGIT:
853 case OP_DIGIT:
854 case OP_NOT_WHITESPACE:
855 case OP_WHITESPACE:
856 case OP_NOT_WORDCHAR:
857 case OP_WORDCHAR:
858 case OP_ANY:
859 case OP_ALLANY:
860 case OP_NOTPROP:
861 case OP_PROP:
862 case OP_ANYNL:
863 case OP_NOT_HSPACE:
864 case OP_HSPACE:
865 case OP_NOT_VSPACE:
866 case OP_VSPACE:
867 case OP_EXTUNI:
868 case OP_EODN:
869 case OP_EOD:
870 case OP_CIRC:
871 case OP_CIRCM:
872 case OP_DOLL:
873 case OP_DOLLM:
874 case OP_CRSTAR:
875 case OP_CRMINSTAR:
876 case OP_CRPLUS:
877 case OP_CRMINPLUS:
878 case OP_CRQUERY:
879 case OP_CRMINQUERY:
880 case OP_CRRANGE:
881 case OP_CRMINRANGE:
882 case OP_CRPOSSTAR:
883 case OP_CRPOSPLUS:
884 case OP_CRPOSQUERY:
885 case OP_CRPOSRANGE:
886 case OP_CLASS:
887 case OP_NCLASS:
888 case OP_REF:
889 case OP_REFI:
890 case OP_DNREF:
891 case OP_DNREFI:
892 case OP_RECURSE:
893 case OP_CALLOUT:
894 case OP_ALT:
895 case OP_KET:
896 case OP_KETRMAX:
897 case OP_KETRMIN:
898 case OP_KETRPOS:
899 case OP_REVERSE:
900 case OP_ASSERT:
901 case OP_ASSERT_NOT:
902 case OP_ASSERTBACK:
903 case OP_ASSERTBACK_NOT:
904 case OP_ONCE:
905 case OP_SCRIPT_RUN:
906 case OP_BRA:
907 case OP_BRAPOS:
908 case OP_CBRA:
909 case OP_CBRAPOS:
910 case OP_COND:
911 case OP_SBRA:
912 case OP_SBRAPOS:
913 case OP_SCBRA:
914 case OP_SCBRAPOS:
915 case OP_SCOND:
916 case OP_CREF:
917 case OP_DNCREF:
918 case OP_RREF:
919 case OP_DNRREF:
920 case OP_FALSE:
921 case OP_TRUE:
922 case OP_BRAZERO:
923 case OP_BRAMINZERO:
924 case OP_BRAPOSZERO:
925 case OP_PRUNE:
926 case OP_SKIP:
927 case OP_THEN:
928 case OP_COMMIT:
929 case OP_FAIL:
930 case OP_ACCEPT:
931 case OP_ASSERT_ACCEPT:
932 case OP_CLOSE:
933 case OP_SKIPZERO:
934 return cc + PRIV(OP_lengths)[*cc];
935
936 case OP_CHAR:
937 case OP_CHARI:
938 case OP_NOT:
939 case OP_NOTI:
940 case OP_STAR:
941 case OP_MINSTAR:
942 case OP_PLUS:
943 case OP_MINPLUS:
944 case OP_QUERY:
945 case OP_MINQUERY:
946 case OP_UPTO:
947 case OP_MINUPTO:
948 case OP_EXACT:
949 case OP_POSSTAR:
950 case OP_POSPLUS:
951 case OP_POSQUERY:
952 case OP_POSUPTO:
953 case OP_STARI:
954 case OP_MINSTARI:
955 case OP_PLUSI:
956 case OP_MINPLUSI:
957 case OP_QUERYI:
958 case OP_MINQUERYI:
959 case OP_UPTOI:
960 case OP_MINUPTOI:
961 case OP_EXACTI:
962 case OP_POSSTARI:
963 case OP_POSPLUSI:
964 case OP_POSQUERYI:
965 case OP_POSUPTOI:
966 case OP_NOTSTAR:
967 case OP_NOTMINSTAR:
968 case OP_NOTPLUS:
969 case OP_NOTMINPLUS:
970 case OP_NOTQUERY:
971 case OP_NOTMINQUERY:
972 case OP_NOTUPTO:
973 case OP_NOTMINUPTO:
974 case OP_NOTEXACT:
975 case OP_NOTPOSSTAR:
976 case OP_NOTPOSPLUS:
977 case OP_NOTPOSQUERY:
978 case OP_NOTPOSUPTO:
979 case OP_NOTSTARI:
980 case OP_NOTMINSTARI:
981 case OP_NOTPLUSI:
982 case OP_NOTMINPLUSI:
983 case OP_NOTQUERYI:
984 case OP_NOTMINQUERYI:
985 case OP_NOTUPTOI:
986 case OP_NOTMINUPTOI:
987 case OP_NOTEXACTI:
988 case OP_NOTPOSSTARI:
989 case OP_NOTPOSPLUSI:
990 case OP_NOTPOSQUERYI:
991 case OP_NOTPOSUPTOI:
992 cc += PRIV(OP_lengths)[*cc];
993 #ifdef SUPPORT_UNICODE
994 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
995 #endif
996 return cc;
997
998 /* Special cases. */
999 case OP_TYPESTAR:
1000 case OP_TYPEMINSTAR:
1001 case OP_TYPEPLUS:
1002 case OP_TYPEMINPLUS:
1003 case OP_TYPEQUERY:
1004 case OP_TYPEMINQUERY:
1005 case OP_TYPEUPTO:
1006 case OP_TYPEMINUPTO:
1007 case OP_TYPEEXACT:
1008 case OP_TYPEPOSSTAR:
1009 case OP_TYPEPOSPLUS:
1010 case OP_TYPEPOSQUERY:
1011 case OP_TYPEPOSUPTO:
1012 return cc + PRIV(OP_lengths)[*cc] - 1;
1013
1014 case OP_ANYBYTE:
1015 #ifdef SUPPORT_UNICODE
1016 if (common->utf) return NULL;
1017 #endif
1018 return cc + 1;
1019
1020 case OP_CALLOUT_STR:
1021 return cc + GET(cc, 1 + 2*LINK_SIZE);
1022
1023 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1024 case OP_XCLASS:
1025 return cc + GET(cc, 1);
1026 #endif
1027
1028 case OP_MARK:
1029 case OP_COMMIT_ARG:
1030 case OP_PRUNE_ARG:
1031 case OP_SKIP_ARG:
1032 case OP_THEN_ARG:
1033 return cc + 1 + 2 + cc[1];
1034
1035 default:
1036 /* All opcodes are supported now! */
1037 SLJIT_UNREACHABLE();
1038 return NULL;
1039 }
1040 }
1041
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1042 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1043 {
1044 int count;
1045 PCRE2_SPTR slot;
1046 PCRE2_SPTR assert_back_end = cc - 1;
1047
1048 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1049 while (cc < ccend)
1050 {
1051 switch(*cc)
1052 {
1053 case OP_SET_SOM:
1054 common->has_set_som = TRUE;
1055 common->might_be_empty = TRUE;
1056 cc += 1;
1057 break;
1058
1059 case OP_REFI:
1060 #ifdef SUPPORT_UNICODE
1061 if (common->iref_ptr == 0)
1062 {
1063 common->iref_ptr = common->ovector_start;
1064 common->ovector_start += 3 * sizeof(sljit_sw);
1065 }
1066 #endif /* SUPPORT_UNICODE */
1067 /* Fall through. */
1068 case OP_REF:
1069 common->optimized_cbracket[GET2(cc, 1)] = 0;
1070 cc += 1 + IMM2_SIZE;
1071 break;
1072
1073 case OP_CBRAPOS:
1074 case OP_SCBRAPOS:
1075 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1076 cc += 1 + LINK_SIZE + IMM2_SIZE;
1077 break;
1078
1079 case OP_COND:
1080 case OP_SCOND:
1081 /* Only AUTO_CALLOUT can insert this opcode. We do
1082 not intend to support this case. */
1083 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1084 return FALSE;
1085 cc += 1 + LINK_SIZE;
1086 break;
1087
1088 case OP_CREF:
1089 common->optimized_cbracket[GET2(cc, 1)] = 0;
1090 cc += 1 + IMM2_SIZE;
1091 break;
1092
1093 case OP_DNREF:
1094 case OP_DNREFI:
1095 case OP_DNCREF:
1096 count = GET2(cc, 1 + IMM2_SIZE);
1097 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1098 while (count-- > 0)
1099 {
1100 common->optimized_cbracket[GET2(slot, 0)] = 0;
1101 slot += common->name_entry_size;
1102 }
1103 cc += 1 + 2 * IMM2_SIZE;
1104 break;
1105
1106 case OP_RECURSE:
1107 /* Set its value only once. */
1108 if (common->recursive_head_ptr == 0)
1109 {
1110 common->recursive_head_ptr = common->ovector_start;
1111 common->ovector_start += sizeof(sljit_sw);
1112 }
1113 cc += 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CALLOUT:
1117 case OP_CALLOUT_STR:
1118 if (common->capture_last_ptr == 0)
1119 {
1120 common->capture_last_ptr = common->ovector_start;
1121 common->ovector_start += sizeof(sljit_sw);
1122 }
1123 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1124 break;
1125
1126 case OP_ASSERTBACK:
1127 slot = bracketend(cc);
1128 if (slot > assert_back_end)
1129 assert_back_end = slot;
1130 cc += 1 + LINK_SIZE;
1131 break;
1132
1133 case OP_THEN_ARG:
1134 common->has_then = TRUE;
1135 common->control_head_ptr = 1;
1136 /* Fall through. */
1137
1138 case OP_COMMIT_ARG:
1139 case OP_PRUNE_ARG:
1140 case OP_MARK:
1141 if (common->mark_ptr == 0)
1142 {
1143 common->mark_ptr = common->ovector_start;
1144 common->ovector_start += sizeof(sljit_sw);
1145 }
1146 cc += 1 + 2 + cc[1];
1147 break;
1148
1149 case OP_THEN:
1150 common->has_then = TRUE;
1151 common->control_head_ptr = 1;
1152 cc += 1;
1153 break;
1154
1155 case OP_SKIP:
1156 if (cc < assert_back_end)
1157 common->has_skip_in_assert_back = TRUE;
1158 cc += 1;
1159 break;
1160
1161 case OP_SKIP_ARG:
1162 common->control_head_ptr = 1;
1163 common->has_skip_arg = TRUE;
1164 if (cc < assert_back_end)
1165 common->has_skip_in_assert_back = TRUE;
1166 cc += 1 + 2 + cc[1];
1167 break;
1168
1169 default:
1170 cc = next_opcode(common, cc);
1171 if (cc == NULL)
1172 return FALSE;
1173 break;
1174 }
1175 }
1176 return TRUE;
1177 }
1178
is_accelerated_repeat(PCRE2_SPTR cc)1179 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
1180 {
1181 switch(*cc)
1182 {
1183 case OP_TYPESTAR:
1184 case OP_TYPEMINSTAR:
1185 case OP_TYPEPLUS:
1186 case OP_TYPEMINPLUS:
1187 case OP_TYPEPOSSTAR:
1188 case OP_TYPEPOSPLUS:
1189 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
1190
1191 case OP_STAR:
1192 case OP_MINSTAR:
1193 case OP_PLUS:
1194 case OP_MINPLUS:
1195 case OP_POSSTAR:
1196 case OP_POSPLUS:
1197
1198 case OP_STARI:
1199 case OP_MINSTARI:
1200 case OP_PLUSI:
1201 case OP_MINPLUSI:
1202 case OP_POSSTARI:
1203 case OP_POSPLUSI:
1204
1205 case OP_NOTSTAR:
1206 case OP_NOTMINSTAR:
1207 case OP_NOTPLUS:
1208 case OP_NOTMINPLUS:
1209 case OP_NOTPOSSTAR:
1210 case OP_NOTPOSPLUS:
1211
1212 case OP_NOTSTARI:
1213 case OP_NOTMINSTARI:
1214 case OP_NOTPLUSI:
1215 case OP_NOTMINPLUSI:
1216 case OP_NOTPOSSTARI:
1217 case OP_NOTPOSPLUSI:
1218 return TRUE;
1219
1220 case OP_CLASS:
1221 case OP_NCLASS:
1222 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1223 case OP_XCLASS:
1224 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1225 #else
1226 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1227 #endif
1228
1229 switch(*cc)
1230 {
1231 case OP_CRSTAR:
1232 case OP_CRMINSTAR:
1233 case OP_CRPLUS:
1234 case OP_CRMINPLUS:
1235 case OP_CRPOSSTAR:
1236 case OP_CRPOSPLUS:
1237 return TRUE;
1238 }
1239 break;
1240 }
1241 return FALSE;
1242 }
1243
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1244 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1245 {
1246 PCRE2_SPTR cc = common->start;
1247 PCRE2_SPTR end;
1248
1249 /* Skip not repeated brackets. */
1250 while (TRUE)
1251 {
1252 switch(*cc)
1253 {
1254 case OP_SOD:
1255 case OP_SOM:
1256 case OP_SET_SOM:
1257 case OP_NOT_WORD_BOUNDARY:
1258 case OP_WORD_BOUNDARY:
1259 case OP_EODN:
1260 case OP_EOD:
1261 case OP_CIRC:
1262 case OP_CIRCM:
1263 case OP_DOLL:
1264 case OP_DOLLM:
1265 /* Zero width assertions. */
1266 cc++;
1267 continue;
1268 }
1269
1270 if (*cc != OP_BRA && *cc != OP_CBRA)
1271 break;
1272
1273 end = cc + GET(cc, 1);
1274 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1275 return FALSE;
1276 if (*cc == OP_CBRA)
1277 {
1278 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1279 return FALSE;
1280 cc += IMM2_SIZE;
1281 }
1282 cc += 1 + LINK_SIZE;
1283 }
1284
1285 if (is_accelerated_repeat(cc))
1286 {
1287 common->fast_forward_bc_ptr = cc;
1288 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1289 *private_data_start += sizeof(sljit_sw);
1290 return TRUE;
1291 }
1292 return FALSE;
1293 }
1294
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1295 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1296 {
1297 PCRE2_SPTR next_alt;
1298
1299 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1300
1301 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1302 return;
1303
1304 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1305 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1306 return;
1307
1308 do
1309 {
1310 next_alt = cc + GET(cc, 1);
1311
1312 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1313
1314 while (TRUE)
1315 {
1316 switch(*cc)
1317 {
1318 case OP_SOD:
1319 case OP_SOM:
1320 case OP_SET_SOM:
1321 case OP_NOT_WORD_BOUNDARY:
1322 case OP_WORD_BOUNDARY:
1323 case OP_EODN:
1324 case OP_EOD:
1325 case OP_CIRC:
1326 case OP_CIRCM:
1327 case OP_DOLL:
1328 case OP_DOLLM:
1329 /* Zero width assertions. */
1330 cc++;
1331 continue;
1332 }
1333 break;
1334 }
1335
1336 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1337 detect_fast_fail(common, cc, private_data_start, depth - 1);
1338
1339 if (is_accelerated_repeat(cc))
1340 {
1341 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1342
1343 if (common->fast_fail_start_ptr == 0)
1344 common->fast_fail_start_ptr = *private_data_start;
1345
1346 *private_data_start += sizeof(sljit_sw);
1347 common->fast_fail_end_ptr = *private_data_start;
1348
1349 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1350 return;
1351 }
1352
1353 cc = next_alt;
1354 }
1355 while (*cc == OP_ALT);
1356 }
1357
get_class_iterator_size(PCRE2_SPTR cc)1358 static int get_class_iterator_size(PCRE2_SPTR cc)
1359 {
1360 sljit_u32 min;
1361 sljit_u32 max;
1362 switch(*cc)
1363 {
1364 case OP_CRSTAR:
1365 case OP_CRPLUS:
1366 return 2;
1367
1368 case OP_CRMINSTAR:
1369 case OP_CRMINPLUS:
1370 case OP_CRQUERY:
1371 case OP_CRMINQUERY:
1372 return 1;
1373
1374 case OP_CRRANGE:
1375 case OP_CRMINRANGE:
1376 min = GET2(cc, 1);
1377 max = GET2(cc, 1 + IMM2_SIZE);
1378 if (max == 0)
1379 return (*cc == OP_CRRANGE) ? 2 : 1;
1380 max -= min;
1381 if (max > 2)
1382 max = 2;
1383 return max;
1384
1385 default:
1386 return 0;
1387 }
1388 }
1389
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1390 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1391 {
1392 PCRE2_SPTR end = bracketend(begin);
1393 PCRE2_SPTR next;
1394 PCRE2_SPTR next_end;
1395 PCRE2_SPTR max_end;
1396 PCRE2_UCHAR type;
1397 sljit_sw length = end - begin;
1398 sljit_s32 min, max, i;
1399
1400 /* Detect fixed iterations first. */
1401 if (end[-(1 + LINK_SIZE)] != OP_KET)
1402 return FALSE;
1403
1404 /* Already detected repeat. */
1405 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1406 return TRUE;
1407
1408 next = end;
1409 min = 1;
1410 while (1)
1411 {
1412 if (*next != *begin)
1413 break;
1414 next_end = bracketend(next);
1415 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1416 break;
1417 next = next_end;
1418 min++;
1419 }
1420
1421 if (min == 2)
1422 return FALSE;
1423
1424 max = 0;
1425 max_end = next;
1426 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1427 {
1428 type = *next;
1429 while (1)
1430 {
1431 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1432 break;
1433 next_end = bracketend(next + 2 + LINK_SIZE);
1434 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1435 break;
1436 next = next_end;
1437 max++;
1438 }
1439
1440 if (next[0] == type && next[1] == *begin && max >= 1)
1441 {
1442 next_end = bracketend(next + 1);
1443 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1444 {
1445 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1446 if (*next_end != OP_KET)
1447 break;
1448
1449 if (i == max)
1450 {
1451 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1452 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1453 /* +2 the original and the last. */
1454 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1455 if (min == 1)
1456 return TRUE;
1457 min--;
1458 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1459 }
1460 }
1461 }
1462 }
1463
1464 if (min >= 3)
1465 {
1466 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1467 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1468 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1469 return TRUE;
1470 }
1471
1472 return FALSE;
1473 }
1474
1475 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1476 case OP_MINSTAR: \
1477 case OP_MINPLUS: \
1478 case OP_QUERY: \
1479 case OP_MINQUERY: \
1480 case OP_MINSTARI: \
1481 case OP_MINPLUSI: \
1482 case OP_QUERYI: \
1483 case OP_MINQUERYI: \
1484 case OP_NOTMINSTAR: \
1485 case OP_NOTMINPLUS: \
1486 case OP_NOTQUERY: \
1487 case OP_NOTMINQUERY: \
1488 case OP_NOTMINSTARI: \
1489 case OP_NOTMINPLUSI: \
1490 case OP_NOTQUERYI: \
1491 case OP_NOTMINQUERYI:
1492
1493 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1494 case OP_STAR: \
1495 case OP_PLUS: \
1496 case OP_STARI: \
1497 case OP_PLUSI: \
1498 case OP_NOTSTAR: \
1499 case OP_NOTPLUS: \
1500 case OP_NOTSTARI: \
1501 case OP_NOTPLUSI:
1502
1503 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1504 case OP_UPTO: \
1505 case OP_MINUPTO: \
1506 case OP_UPTOI: \
1507 case OP_MINUPTOI: \
1508 case OP_NOTUPTO: \
1509 case OP_NOTMINUPTO: \
1510 case OP_NOTUPTOI: \
1511 case OP_NOTMINUPTOI:
1512
1513 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1514 case OP_TYPEMINSTAR: \
1515 case OP_TYPEMINPLUS: \
1516 case OP_TYPEQUERY: \
1517 case OP_TYPEMINQUERY:
1518
1519 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1520 case OP_TYPESTAR: \
1521 case OP_TYPEPLUS:
1522
1523 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1524 case OP_TYPEUPTO: \
1525 case OP_TYPEMINUPTO:
1526
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1527 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1528 {
1529 PCRE2_SPTR cc = common->start;
1530 PCRE2_SPTR alternative;
1531 PCRE2_SPTR end = NULL;
1532 int private_data_ptr = *private_data_start;
1533 int space, size, bracketlen;
1534 BOOL repeat_check = TRUE;
1535
1536 while (cc < ccend)
1537 {
1538 space = 0;
1539 size = 0;
1540 bracketlen = 0;
1541 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1542 break;
1543
1544 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1545 {
1546 if (detect_repeat(common, cc))
1547 {
1548 /* These brackets are converted to repeats, so no global
1549 based single character repeat is allowed. */
1550 if (cc >= end)
1551 end = bracketend(cc);
1552 }
1553 }
1554 repeat_check = TRUE;
1555
1556 switch(*cc)
1557 {
1558 case OP_KET:
1559 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1560 {
1561 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1562 private_data_ptr += sizeof(sljit_sw);
1563 cc += common->private_data_ptrs[cc + 1 - common->start];
1564 }
1565 cc += 1 + LINK_SIZE;
1566 break;
1567
1568 case OP_ASSERT:
1569 case OP_ASSERT_NOT:
1570 case OP_ASSERTBACK:
1571 case OP_ASSERTBACK_NOT:
1572 case OP_ONCE:
1573 case OP_SCRIPT_RUN:
1574 case OP_BRAPOS:
1575 case OP_SBRA:
1576 case OP_SBRAPOS:
1577 case OP_SCOND:
1578 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1579 private_data_ptr += sizeof(sljit_sw);
1580 bracketlen = 1 + LINK_SIZE;
1581 break;
1582
1583 case OP_CBRAPOS:
1584 case OP_SCBRAPOS:
1585 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1586 private_data_ptr += sizeof(sljit_sw);
1587 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1588 break;
1589
1590 case OP_COND:
1591 /* Might be a hidden SCOND. */
1592 alternative = cc + GET(cc, 1);
1593 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1594 {
1595 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1596 private_data_ptr += sizeof(sljit_sw);
1597 }
1598 bracketlen = 1 + LINK_SIZE;
1599 break;
1600
1601 case OP_BRA:
1602 bracketlen = 1 + LINK_SIZE;
1603 break;
1604
1605 case OP_CBRA:
1606 case OP_SCBRA:
1607 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1608 break;
1609
1610 case OP_BRAZERO:
1611 case OP_BRAMINZERO:
1612 case OP_BRAPOSZERO:
1613 repeat_check = FALSE;
1614 size = 1;
1615 break;
1616
1617 CASE_ITERATOR_PRIVATE_DATA_1
1618 space = 1;
1619 size = -2;
1620 break;
1621
1622 CASE_ITERATOR_PRIVATE_DATA_2A
1623 space = 2;
1624 size = -2;
1625 break;
1626
1627 CASE_ITERATOR_PRIVATE_DATA_2B
1628 space = 2;
1629 size = -(2 + IMM2_SIZE);
1630 break;
1631
1632 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1633 space = 1;
1634 size = 1;
1635 break;
1636
1637 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1638 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1639 space = 2;
1640 size = 1;
1641 break;
1642
1643 case OP_TYPEUPTO:
1644 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1645 space = 2;
1646 size = 1 + IMM2_SIZE;
1647 break;
1648
1649 case OP_TYPEMINUPTO:
1650 space = 2;
1651 size = 1 + IMM2_SIZE;
1652 break;
1653
1654 case OP_CLASS:
1655 case OP_NCLASS:
1656 space = get_class_iterator_size(cc + size);
1657 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1658 break;
1659
1660 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1661 case OP_XCLASS:
1662 space = get_class_iterator_size(cc + size);
1663 size = GET(cc, 1);
1664 break;
1665 #endif
1666
1667 default:
1668 cc = next_opcode(common, cc);
1669 SLJIT_ASSERT(cc != NULL);
1670 break;
1671 }
1672
1673 /* Character iterators, which are not inside a repeated bracket,
1674 gets a private slot instead of allocating it on the stack. */
1675 if (space > 0 && cc >= end)
1676 {
1677 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1678 private_data_ptr += sizeof(sljit_sw) * space;
1679 }
1680
1681 if (size != 0)
1682 {
1683 if (size < 0)
1684 {
1685 cc += -size;
1686 #ifdef SUPPORT_UNICODE
1687 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1688 #endif
1689 }
1690 else
1691 cc += size;
1692 }
1693
1694 if (bracketlen > 0)
1695 {
1696 if (cc >= end)
1697 {
1698 end = bracketend(cc);
1699 if (end[-1 - LINK_SIZE] == OP_KET)
1700 end = NULL;
1701 }
1702 cc += bracketlen;
1703 }
1704 }
1705 *private_data_start = private_data_ptr;
1706 }
1707
1708 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1709 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1710 {
1711 int length = 0;
1712 int possessive = 0;
1713 BOOL stack_restore = FALSE;
1714 BOOL setsom_found = recursive;
1715 BOOL setmark_found = recursive;
1716 /* The last capture is a local variable even for recursions. */
1717 BOOL capture_last_found = FALSE;
1718
1719 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 *needs_control_head = TRUE;
1722 #else
1723 *needs_control_head = FALSE;
1724 #endif
1725
1726 if (ccend == NULL)
1727 {
1728 ccend = bracketend(cc) - (1 + LINK_SIZE);
1729 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1730 {
1731 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1732 /* This is correct regardless of common->capture_last_ptr. */
1733 capture_last_found = TRUE;
1734 }
1735 cc = next_opcode(common, cc);
1736 }
1737
1738 SLJIT_ASSERT(cc != NULL);
1739 while (cc < ccend)
1740 switch(*cc)
1741 {
1742 case OP_SET_SOM:
1743 SLJIT_ASSERT(common->has_set_som);
1744 stack_restore = TRUE;
1745 if (!setsom_found)
1746 {
1747 length += 2;
1748 setsom_found = TRUE;
1749 }
1750 cc += 1;
1751 break;
1752
1753 case OP_MARK:
1754 case OP_COMMIT_ARG:
1755 case OP_PRUNE_ARG:
1756 case OP_THEN_ARG:
1757 SLJIT_ASSERT(common->mark_ptr != 0);
1758 stack_restore = TRUE;
1759 if (!setmark_found)
1760 {
1761 length += 2;
1762 setmark_found = TRUE;
1763 }
1764 if (common->control_head_ptr != 0)
1765 *needs_control_head = TRUE;
1766 cc += 1 + 2 + cc[1];
1767 break;
1768
1769 case OP_RECURSE:
1770 stack_restore = TRUE;
1771 if (common->has_set_som && !setsom_found)
1772 {
1773 length += 2;
1774 setsom_found = TRUE;
1775 }
1776 if (common->mark_ptr != 0 && !setmark_found)
1777 {
1778 length += 2;
1779 setmark_found = TRUE;
1780 }
1781 if (common->capture_last_ptr != 0 && !capture_last_found)
1782 {
1783 length += 2;
1784 capture_last_found = TRUE;
1785 }
1786 cc += 1 + LINK_SIZE;
1787 break;
1788
1789 case OP_CBRA:
1790 case OP_CBRAPOS:
1791 case OP_SCBRA:
1792 case OP_SCBRAPOS:
1793 stack_restore = TRUE;
1794 if (common->capture_last_ptr != 0 && !capture_last_found)
1795 {
1796 length += 2;
1797 capture_last_found = TRUE;
1798 }
1799 length += 3;
1800 cc += 1 + LINK_SIZE + IMM2_SIZE;
1801 break;
1802
1803 case OP_THEN:
1804 stack_restore = TRUE;
1805 if (common->control_head_ptr != 0)
1806 *needs_control_head = TRUE;
1807 cc ++;
1808 break;
1809
1810 default:
1811 stack_restore = TRUE;
1812 /* Fall through. */
1813
1814 case OP_NOT_WORD_BOUNDARY:
1815 case OP_WORD_BOUNDARY:
1816 case OP_NOT_DIGIT:
1817 case OP_DIGIT:
1818 case OP_NOT_WHITESPACE:
1819 case OP_WHITESPACE:
1820 case OP_NOT_WORDCHAR:
1821 case OP_WORDCHAR:
1822 case OP_ANY:
1823 case OP_ALLANY:
1824 case OP_ANYBYTE:
1825 case OP_NOTPROP:
1826 case OP_PROP:
1827 case OP_ANYNL:
1828 case OP_NOT_HSPACE:
1829 case OP_HSPACE:
1830 case OP_NOT_VSPACE:
1831 case OP_VSPACE:
1832 case OP_EXTUNI:
1833 case OP_EODN:
1834 case OP_EOD:
1835 case OP_CIRC:
1836 case OP_CIRCM:
1837 case OP_DOLL:
1838 case OP_DOLLM:
1839 case OP_CHAR:
1840 case OP_CHARI:
1841 case OP_NOT:
1842 case OP_NOTI:
1843
1844 case OP_EXACT:
1845 case OP_POSSTAR:
1846 case OP_POSPLUS:
1847 case OP_POSQUERY:
1848 case OP_POSUPTO:
1849
1850 case OP_EXACTI:
1851 case OP_POSSTARI:
1852 case OP_POSPLUSI:
1853 case OP_POSQUERYI:
1854 case OP_POSUPTOI:
1855
1856 case OP_NOTEXACT:
1857 case OP_NOTPOSSTAR:
1858 case OP_NOTPOSPLUS:
1859 case OP_NOTPOSQUERY:
1860 case OP_NOTPOSUPTO:
1861
1862 case OP_NOTEXACTI:
1863 case OP_NOTPOSSTARI:
1864 case OP_NOTPOSPLUSI:
1865 case OP_NOTPOSQUERYI:
1866 case OP_NOTPOSUPTOI:
1867
1868 case OP_TYPEEXACT:
1869 case OP_TYPEPOSSTAR:
1870 case OP_TYPEPOSPLUS:
1871 case OP_TYPEPOSQUERY:
1872 case OP_TYPEPOSUPTO:
1873
1874 case OP_CLASS:
1875 case OP_NCLASS:
1876 case OP_XCLASS:
1877
1878 case OP_CALLOUT:
1879 case OP_CALLOUT_STR:
1880
1881 cc = next_opcode(common, cc);
1882 SLJIT_ASSERT(cc != NULL);
1883 break;
1884 }
1885
1886 /* Possessive quantifiers can use a special case. */
1887 if (SLJIT_UNLIKELY(possessive == length))
1888 return stack_restore ? no_frame : no_stack;
1889
1890 if (length > 0)
1891 return length + 1;
1892 return stack_restore ? no_frame : no_stack;
1893 }
1894
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)1895 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
1896 {
1897 DEFINE_COMPILER;
1898 BOOL setsom_found = FALSE;
1899 BOOL setmark_found = FALSE;
1900 /* The last capture is a local variable even for recursions. */
1901 BOOL capture_last_found = FALSE;
1902 int offset;
1903
1904 /* >= 1 + shortest item size (2) */
1905 SLJIT_UNUSED_ARG(stacktop);
1906 SLJIT_ASSERT(stackpos >= stacktop + 2);
1907
1908 stackpos = STACK(stackpos);
1909 if (ccend == NULL)
1910 {
1911 ccend = bracketend(cc) - (1 + LINK_SIZE);
1912 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
1913 cc = next_opcode(common, cc);
1914 }
1915
1916 SLJIT_ASSERT(cc != NULL);
1917 while (cc < ccend)
1918 switch(*cc)
1919 {
1920 case OP_SET_SOM:
1921 SLJIT_ASSERT(common->has_set_som);
1922 if (!setsom_found)
1923 {
1924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1926 stackpos -= (int)sizeof(sljit_sw);
1927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1928 stackpos -= (int)sizeof(sljit_sw);
1929 setsom_found = TRUE;
1930 }
1931 cc += 1;
1932 break;
1933
1934 case OP_MARK:
1935 case OP_COMMIT_ARG:
1936 case OP_PRUNE_ARG:
1937 case OP_THEN_ARG:
1938 SLJIT_ASSERT(common->mark_ptr != 0);
1939 if (!setmark_found)
1940 {
1941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1943 stackpos -= (int)sizeof(sljit_sw);
1944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1945 stackpos -= (int)sizeof(sljit_sw);
1946 setmark_found = TRUE;
1947 }
1948 cc += 1 + 2 + cc[1];
1949 break;
1950
1951 case OP_RECURSE:
1952 if (common->has_set_som && !setsom_found)
1953 {
1954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1956 stackpos -= (int)sizeof(sljit_sw);
1957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1958 stackpos -= (int)sizeof(sljit_sw);
1959 setsom_found = TRUE;
1960 }
1961 if (common->mark_ptr != 0 && !setmark_found)
1962 {
1963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1965 stackpos -= (int)sizeof(sljit_sw);
1966 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1967 stackpos -= (int)sizeof(sljit_sw);
1968 setmark_found = TRUE;
1969 }
1970 if (common->capture_last_ptr != 0 && !capture_last_found)
1971 {
1972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1974 stackpos -= (int)sizeof(sljit_sw);
1975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1976 stackpos -= (int)sizeof(sljit_sw);
1977 capture_last_found = TRUE;
1978 }
1979 cc += 1 + LINK_SIZE;
1980 break;
1981
1982 case OP_CBRA:
1983 case OP_CBRAPOS:
1984 case OP_SCBRA:
1985 case OP_SCBRAPOS:
1986 if (common->capture_last_ptr != 0 && !capture_last_found)
1987 {
1988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1990 stackpos -= (int)sizeof(sljit_sw);
1991 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1992 stackpos -= (int)sizeof(sljit_sw);
1993 capture_last_found = TRUE;
1994 }
1995 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1996 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1997 stackpos -= (int)sizeof(sljit_sw);
1998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2001 stackpos -= (int)sizeof(sljit_sw);
2002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2003 stackpos -= (int)sizeof(sljit_sw);
2004
2005 cc += 1 + LINK_SIZE + IMM2_SIZE;
2006 break;
2007
2008 default:
2009 cc = next_opcode(common, cc);
2010 SLJIT_ASSERT(cc != NULL);
2011 break;
2012 }
2013
2014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2015 SLJIT_ASSERT(stackpos == STACK(stacktop));
2016 }
2017
2018 #define RECURSE_TMP_REG_COUNT 3
2019
2020 typedef struct delayed_mem_copy_status {
2021 struct sljit_compiler *compiler;
2022 int store_bases[RECURSE_TMP_REG_COUNT];
2023 int store_offsets[RECURSE_TMP_REG_COUNT];
2024 int tmp_regs[RECURSE_TMP_REG_COUNT];
2025 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2026 int next_tmp_reg;
2027 } delayed_mem_copy_status;
2028
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2029 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2030 {
2031 int i;
2032
2033 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2034 {
2035 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2036 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2037
2038 status->store_bases[i] = -1;
2039 }
2040 status->next_tmp_reg = 0;
2041 status->compiler = common->compiler;
2042 }
2043
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2044 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2045 int store_base, sljit_sw store_offset)
2046 {
2047 struct sljit_compiler *compiler = status->compiler;
2048 int next_tmp_reg = status->next_tmp_reg;
2049 int tmp_reg = status->tmp_regs[next_tmp_reg];
2050
2051 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2052
2053 if (status->store_bases[next_tmp_reg] == -1)
2054 {
2055 /* Preserve virtual registers. */
2056 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2057 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2058 }
2059 else
2060 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2061
2062 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2063 status->store_bases[next_tmp_reg] = store_base;
2064 status->store_offsets[next_tmp_reg] = store_offset;
2065
2066 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2067 }
2068
delayed_mem_copy_finish(delayed_mem_copy_status * status)2069 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2070 {
2071 struct sljit_compiler *compiler = status->compiler;
2072 int next_tmp_reg = status->next_tmp_reg;
2073 int tmp_reg, saved_tmp_reg, i;
2074
2075 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2076 {
2077 if (status->store_bases[next_tmp_reg] != -1)
2078 {
2079 tmp_reg = status->tmp_regs[next_tmp_reg];
2080 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2081
2082 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2083
2084 /* Restore virtual registers. */
2085 if (sljit_get_register_index(saved_tmp_reg) < 0)
2086 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2087 }
2088
2089 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2090 }
2091 }
2092
2093 #undef RECURSE_TMP_REG_COUNT
2094
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2095 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2096 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2097 {
2098 int length = 1;
2099 int size;
2100 PCRE2_SPTR alternative;
2101 BOOL quit_found = FALSE;
2102 BOOL accept_found = FALSE;
2103 BOOL setsom_found = FALSE;
2104 BOOL setmark_found = FALSE;
2105 BOOL capture_last_found = FALSE;
2106 BOOL control_head_found = FALSE;
2107
2108 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2109 SLJIT_ASSERT(common->control_head_ptr != 0);
2110 control_head_found = TRUE;
2111 #endif
2112
2113 /* Calculate the sum of the private machine words. */
2114 while (cc < ccend)
2115 {
2116 size = 0;
2117 switch(*cc)
2118 {
2119 case OP_SET_SOM:
2120 SLJIT_ASSERT(common->has_set_som);
2121 setsom_found = TRUE;
2122 cc += 1;
2123 break;
2124
2125 case OP_RECURSE:
2126 if (common->has_set_som)
2127 setsom_found = TRUE;
2128 if (common->mark_ptr != 0)
2129 setmark_found = TRUE;
2130 if (common->capture_last_ptr != 0)
2131 capture_last_found = TRUE;
2132 cc += 1 + LINK_SIZE;
2133 break;
2134
2135 case OP_KET:
2136 if (PRIVATE_DATA(cc) != 0)
2137 {
2138 length++;
2139 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2140 cc += PRIVATE_DATA(cc + 1);
2141 }
2142 cc += 1 + LINK_SIZE;
2143 break;
2144
2145 case OP_ASSERT:
2146 case OP_ASSERT_NOT:
2147 case OP_ASSERTBACK:
2148 case OP_ASSERTBACK_NOT:
2149 case OP_ONCE:
2150 case OP_SCRIPT_RUN:
2151 case OP_BRAPOS:
2152 case OP_SBRA:
2153 case OP_SBRAPOS:
2154 case OP_SCOND:
2155 length++;
2156 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2157 cc += 1 + LINK_SIZE;
2158 break;
2159
2160 case OP_CBRA:
2161 case OP_SCBRA:
2162 length += 2;
2163 if (common->capture_last_ptr != 0)
2164 capture_last_found = TRUE;
2165 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2166 length++;
2167 cc += 1 + LINK_SIZE + IMM2_SIZE;
2168 break;
2169
2170 case OP_CBRAPOS:
2171 case OP_SCBRAPOS:
2172 length += 2 + 2;
2173 if (common->capture_last_ptr != 0)
2174 capture_last_found = TRUE;
2175 cc += 1 + LINK_SIZE + IMM2_SIZE;
2176 break;
2177
2178 case OP_COND:
2179 /* Might be a hidden SCOND. */
2180 alternative = cc + GET(cc, 1);
2181 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2182 length++;
2183 cc += 1 + LINK_SIZE;
2184 break;
2185
2186 CASE_ITERATOR_PRIVATE_DATA_1
2187 if (PRIVATE_DATA(cc) != 0)
2188 length++;
2189 cc += 2;
2190 #ifdef SUPPORT_UNICODE
2191 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2192 #endif
2193 break;
2194
2195 CASE_ITERATOR_PRIVATE_DATA_2A
2196 if (PRIVATE_DATA(cc) != 0)
2197 length += 2;
2198 cc += 2;
2199 #ifdef SUPPORT_UNICODE
2200 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2201 #endif
2202 break;
2203
2204 CASE_ITERATOR_PRIVATE_DATA_2B
2205 if (PRIVATE_DATA(cc) != 0)
2206 length += 2;
2207 cc += 2 + IMM2_SIZE;
2208 #ifdef SUPPORT_UNICODE
2209 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2210 #endif
2211 break;
2212
2213 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2214 if (PRIVATE_DATA(cc) != 0)
2215 length++;
2216 cc += 1;
2217 break;
2218
2219 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2220 if (PRIVATE_DATA(cc) != 0)
2221 length += 2;
2222 cc += 1;
2223 break;
2224
2225 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2226 if (PRIVATE_DATA(cc) != 0)
2227 length += 2;
2228 cc += 1 + IMM2_SIZE;
2229 break;
2230
2231 case OP_CLASS:
2232 case OP_NCLASS:
2233 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2234 case OP_XCLASS:
2235 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2236 #else
2237 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2238 #endif
2239 if (PRIVATE_DATA(cc) != 0)
2240 length += get_class_iterator_size(cc + size);
2241 cc += size;
2242 break;
2243
2244 case OP_MARK:
2245 case OP_COMMIT_ARG:
2246 case OP_PRUNE_ARG:
2247 case OP_THEN_ARG:
2248 SLJIT_ASSERT(common->mark_ptr != 0);
2249 if (!setmark_found)
2250 setmark_found = TRUE;
2251 if (common->control_head_ptr != 0)
2252 control_head_found = TRUE;
2253 if (*cc != OP_MARK)
2254 quit_found = TRUE;
2255
2256 cc += 1 + 2 + cc[1];
2257 break;
2258
2259 case OP_PRUNE:
2260 case OP_SKIP:
2261 case OP_COMMIT:
2262 quit_found = TRUE;
2263 cc++;
2264 break;
2265
2266 case OP_SKIP_ARG:
2267 quit_found = TRUE;
2268 cc += 1 + 2 + cc[1];
2269 break;
2270
2271 case OP_THEN:
2272 SLJIT_ASSERT(common->control_head_ptr != 0);
2273 quit_found = TRUE;
2274 if (!control_head_found)
2275 control_head_found = TRUE;
2276 cc++;
2277 break;
2278
2279 case OP_ACCEPT:
2280 case OP_ASSERT_ACCEPT:
2281 accept_found = TRUE;
2282 cc++;
2283 break;
2284
2285 default:
2286 cc = next_opcode(common, cc);
2287 SLJIT_ASSERT(cc != NULL);
2288 break;
2289 }
2290 }
2291 SLJIT_ASSERT(cc == ccend);
2292
2293 if (control_head_found)
2294 length++;
2295 if (capture_last_found)
2296 length++;
2297 if (quit_found)
2298 {
2299 if (setsom_found)
2300 length++;
2301 if (setmark_found)
2302 length++;
2303 }
2304
2305 *needs_control_head = control_head_found;
2306 *has_quit = quit_found;
2307 *has_accept = accept_found;
2308 return length;
2309 }
2310
2311 enum copy_recurse_data_types {
2312 recurse_copy_from_global,
2313 recurse_copy_private_to_global,
2314 recurse_copy_shared_to_global,
2315 recurse_copy_kept_shared_to_global,
2316 recurse_swap_global
2317 };
2318
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2319 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2320 int type, int stackptr, int stacktop, BOOL has_quit)
2321 {
2322 delayed_mem_copy_status status;
2323 PCRE2_SPTR alternative;
2324 sljit_sw private_srcw[2];
2325 sljit_sw shared_srcw[3];
2326 sljit_sw kept_shared_srcw[2];
2327 int private_count, shared_count, kept_shared_count;
2328 int from_sp, base_reg, offset, i;
2329 BOOL setsom_found = FALSE;
2330 BOOL setmark_found = FALSE;
2331 BOOL capture_last_found = FALSE;
2332 BOOL control_head_found = FALSE;
2333
2334 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2335 SLJIT_ASSERT(common->control_head_ptr != 0);
2336 control_head_found = TRUE;
2337 #endif
2338
2339 switch (type)
2340 {
2341 case recurse_copy_from_global:
2342 from_sp = TRUE;
2343 base_reg = STACK_TOP;
2344 break;
2345
2346 case recurse_copy_private_to_global:
2347 case recurse_copy_shared_to_global:
2348 case recurse_copy_kept_shared_to_global:
2349 from_sp = FALSE;
2350 base_reg = STACK_TOP;
2351 break;
2352
2353 default:
2354 SLJIT_ASSERT(type == recurse_swap_global);
2355 from_sp = FALSE;
2356 base_reg = TMP2;
2357 break;
2358 }
2359
2360 stackptr = STACK(stackptr);
2361 stacktop = STACK(stacktop);
2362
2363 status.tmp_regs[0] = TMP1;
2364 status.saved_tmp_regs[0] = TMP1;
2365
2366 if (base_reg != TMP2)
2367 {
2368 status.tmp_regs[1] = TMP2;
2369 status.saved_tmp_regs[1] = TMP2;
2370 }
2371 else
2372 {
2373 status.saved_tmp_regs[1] = RETURN_ADDR;
2374 if (sljit_get_register_index(RETURN_ADDR) == -1)
2375 status.tmp_regs[1] = STR_PTR;
2376 else
2377 status.tmp_regs[1] = RETURN_ADDR;
2378 }
2379
2380 status.saved_tmp_regs[2] = TMP3;
2381 if (sljit_get_register_index(TMP3) == -1)
2382 status.tmp_regs[2] = STR_END;
2383 else
2384 status.tmp_regs[2] = TMP3;
2385
2386 delayed_mem_copy_init(&status, common);
2387
2388 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2389 {
2390 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2391
2392 if (!from_sp)
2393 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2394
2395 if (from_sp || type == recurse_swap_global)
2396 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2397 }
2398
2399 stackptr += sizeof(sljit_sw);
2400
2401 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2402 if (type != recurse_copy_shared_to_global)
2403 {
2404 if (!from_sp)
2405 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2406
2407 if (from_sp || type == recurse_swap_global)
2408 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2409 }
2410
2411 stackptr += sizeof(sljit_sw);
2412 #endif
2413
2414 while (cc < ccend)
2415 {
2416 private_count = 0;
2417 shared_count = 0;
2418 kept_shared_count = 0;
2419
2420 switch(*cc)
2421 {
2422 case OP_SET_SOM:
2423 SLJIT_ASSERT(common->has_set_som);
2424 if (has_quit && !setsom_found)
2425 {
2426 kept_shared_srcw[0] = OVECTOR(0);
2427 kept_shared_count = 1;
2428 setsom_found = TRUE;
2429 }
2430 cc += 1;
2431 break;
2432
2433 case OP_RECURSE:
2434 if (has_quit)
2435 {
2436 if (common->has_set_som && !setsom_found)
2437 {
2438 kept_shared_srcw[0] = OVECTOR(0);
2439 kept_shared_count = 1;
2440 setsom_found = TRUE;
2441 }
2442 if (common->mark_ptr != 0 && !setmark_found)
2443 {
2444 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2445 kept_shared_count++;
2446 setmark_found = TRUE;
2447 }
2448 }
2449 if (common->capture_last_ptr != 0 && !capture_last_found)
2450 {
2451 shared_srcw[0] = common->capture_last_ptr;
2452 shared_count = 1;
2453 capture_last_found = TRUE;
2454 }
2455 cc += 1 + LINK_SIZE;
2456 break;
2457
2458 case OP_KET:
2459 if (PRIVATE_DATA(cc) != 0)
2460 {
2461 private_count = 1;
2462 private_srcw[0] = PRIVATE_DATA(cc);
2463 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2464 cc += PRIVATE_DATA(cc + 1);
2465 }
2466 cc += 1 + LINK_SIZE;
2467 break;
2468
2469 case OP_ASSERT:
2470 case OP_ASSERT_NOT:
2471 case OP_ASSERTBACK:
2472 case OP_ASSERTBACK_NOT:
2473 case OP_ONCE:
2474 case OP_SCRIPT_RUN:
2475 case OP_BRAPOS:
2476 case OP_SBRA:
2477 case OP_SBRAPOS:
2478 case OP_SCOND:
2479 private_count = 1;
2480 private_srcw[0] = PRIVATE_DATA(cc);
2481 cc += 1 + LINK_SIZE;
2482 break;
2483
2484 case OP_CBRA:
2485 case OP_SCBRA:
2486 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2487 shared_srcw[0] = OVECTOR(offset);
2488 shared_srcw[1] = OVECTOR(offset + 1);
2489 shared_count = 2;
2490
2491 if (common->capture_last_ptr != 0 && !capture_last_found)
2492 {
2493 shared_srcw[2] = common->capture_last_ptr;
2494 shared_count = 3;
2495 capture_last_found = TRUE;
2496 }
2497
2498 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2499 {
2500 private_count = 1;
2501 private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2502 }
2503 cc += 1 + LINK_SIZE + IMM2_SIZE;
2504 break;
2505
2506 case OP_CBRAPOS:
2507 case OP_SCBRAPOS:
2508 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2509 shared_srcw[0] = OVECTOR(offset);
2510 shared_srcw[1] = OVECTOR(offset + 1);
2511 shared_count = 2;
2512
2513 if (common->capture_last_ptr != 0 && !capture_last_found)
2514 {
2515 shared_srcw[2] = common->capture_last_ptr;
2516 shared_count = 3;
2517 capture_last_found = TRUE;
2518 }
2519
2520 private_count = 2;
2521 private_srcw[0] = PRIVATE_DATA(cc);
2522 private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2523 cc += 1 + LINK_SIZE + IMM2_SIZE;
2524 break;
2525
2526 case OP_COND:
2527 /* Might be a hidden SCOND. */
2528 alternative = cc + GET(cc, 1);
2529 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2530 {
2531 private_count = 1;
2532 private_srcw[0] = PRIVATE_DATA(cc);
2533 }
2534 cc += 1 + LINK_SIZE;
2535 break;
2536
2537 CASE_ITERATOR_PRIVATE_DATA_1
2538 if (PRIVATE_DATA(cc))
2539 {
2540 private_count = 1;
2541 private_srcw[0] = PRIVATE_DATA(cc);
2542 }
2543 cc += 2;
2544 #ifdef SUPPORT_UNICODE
2545 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2546 #endif
2547 break;
2548
2549 CASE_ITERATOR_PRIVATE_DATA_2A
2550 if (PRIVATE_DATA(cc))
2551 {
2552 private_count = 2;
2553 private_srcw[0] = PRIVATE_DATA(cc);
2554 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2555 }
2556 cc += 2;
2557 #ifdef SUPPORT_UNICODE
2558 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2559 #endif
2560 break;
2561
2562 CASE_ITERATOR_PRIVATE_DATA_2B
2563 if (PRIVATE_DATA(cc))
2564 {
2565 private_count = 2;
2566 private_srcw[0] = PRIVATE_DATA(cc);
2567 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2568 }
2569 cc += 2 + IMM2_SIZE;
2570 #ifdef SUPPORT_UNICODE
2571 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2572 #endif
2573 break;
2574
2575 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2576 if (PRIVATE_DATA(cc))
2577 {
2578 private_count = 1;
2579 private_srcw[0] = PRIVATE_DATA(cc);
2580 }
2581 cc += 1;
2582 break;
2583
2584 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2585 if (PRIVATE_DATA(cc))
2586 {
2587 private_count = 2;
2588 private_srcw[0] = PRIVATE_DATA(cc);
2589 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2590 }
2591 cc += 1;
2592 break;
2593
2594 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2595 if (PRIVATE_DATA(cc))
2596 {
2597 private_count = 2;
2598 private_srcw[0] = PRIVATE_DATA(cc);
2599 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2600 }
2601 cc += 1 + IMM2_SIZE;
2602 break;
2603
2604 case OP_CLASS:
2605 case OP_NCLASS:
2606 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2607 case OP_XCLASS:
2608 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2609 #else
2610 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2611 #endif
2612 if (PRIVATE_DATA(cc) != 0)
2613 switch(get_class_iterator_size(cc + i))
2614 {
2615 case 1:
2616 private_count = 1;
2617 private_srcw[0] = PRIVATE_DATA(cc);
2618 break;
2619
2620 case 2:
2621 private_count = 2;
2622 private_srcw[0] = PRIVATE_DATA(cc);
2623 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2624 break;
2625
2626 default:
2627 SLJIT_UNREACHABLE();
2628 break;
2629 }
2630 cc += i;
2631 break;
2632
2633 case OP_MARK:
2634 case OP_COMMIT_ARG:
2635 case OP_PRUNE_ARG:
2636 case OP_THEN_ARG:
2637 SLJIT_ASSERT(common->mark_ptr != 0);
2638 if (has_quit && !setmark_found)
2639 {
2640 kept_shared_srcw[0] = common->mark_ptr;
2641 kept_shared_count = 1;
2642 setmark_found = TRUE;
2643 }
2644 if (common->control_head_ptr != 0 && !control_head_found)
2645 {
2646 shared_srcw[0] = common->control_head_ptr;
2647 shared_count = 1;
2648 control_head_found = TRUE;
2649 }
2650 cc += 1 + 2 + cc[1];
2651 break;
2652
2653 case OP_THEN:
2654 SLJIT_ASSERT(common->control_head_ptr != 0);
2655 if (!control_head_found)
2656 {
2657 shared_srcw[0] = common->control_head_ptr;
2658 shared_count = 1;
2659 control_head_found = TRUE;
2660 }
2661 cc++;
2662 break;
2663
2664 default:
2665 cc = next_opcode(common, cc);
2666 SLJIT_ASSERT(cc != NULL);
2667 break;
2668 }
2669
2670 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2671 {
2672 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2673
2674 for (i = 0; i < private_count; i++)
2675 {
2676 SLJIT_ASSERT(private_srcw[i] != 0);
2677
2678 if (!from_sp)
2679 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2680
2681 if (from_sp || type == recurse_swap_global)
2682 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2683
2684 stackptr += sizeof(sljit_sw);
2685 }
2686 }
2687 else
2688 stackptr += sizeof(sljit_sw) * private_count;
2689
2690 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2691 {
2692 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2693
2694 for (i = 0; i < shared_count; i++)
2695 {
2696 SLJIT_ASSERT(shared_srcw[i] != 0);
2697
2698 if (!from_sp)
2699 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2700
2701 if (from_sp || type == recurse_swap_global)
2702 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2703
2704 stackptr += sizeof(sljit_sw);
2705 }
2706 }
2707 else
2708 stackptr += sizeof(sljit_sw) * shared_count;
2709
2710 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2711 {
2712 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2713
2714 for (i = 0; i < kept_shared_count; i++)
2715 {
2716 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2717
2718 if (!from_sp)
2719 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2720
2721 if (from_sp || type == recurse_swap_global)
2722 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2723
2724 stackptr += sizeof(sljit_sw);
2725 }
2726 }
2727 else
2728 stackptr += sizeof(sljit_sw) * kept_shared_count;
2729 }
2730
2731 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2732
2733 delayed_mem_copy_finish(&status);
2734 }
2735
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2736 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2737 {
2738 PCRE2_SPTR end = bracketend(cc);
2739 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2740
2741 /* Assert captures then. */
2742 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2743 current_offset = NULL;
2744 /* Conditional block does not. */
2745 if (*cc == OP_COND || *cc == OP_SCOND)
2746 has_alternatives = FALSE;
2747
2748 cc = next_opcode(common, cc);
2749 if (has_alternatives)
2750 current_offset = common->then_offsets + (cc - common->start);
2751
2752 while (cc < end)
2753 {
2754 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2755 cc = set_then_offsets(common, cc, current_offset);
2756 else
2757 {
2758 if (*cc == OP_ALT && has_alternatives)
2759 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2760 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2761 *current_offset = 1;
2762 cc = next_opcode(common, cc);
2763 }
2764 }
2765
2766 return end;
2767 }
2768
2769 #undef CASE_ITERATOR_PRIVATE_DATA_1
2770 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2771 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2772 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2773 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2774 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2775
is_powerof2(unsigned int value)2776 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2777 {
2778 return (value & (value - 1)) == 0;
2779 }
2780
set_jumps(jump_list * list,struct sljit_label * label)2781 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2782 {
2783 while (list)
2784 {
2785 /* sljit_set_label is clever enough to do nothing
2786 if either the jump or the label is NULL. */
2787 SET_LABEL(list->jump, label);
2788 list = list->next;
2789 }
2790 }
2791
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2792 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2793 {
2794 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2795 if (list_item)
2796 {
2797 list_item->next = *list;
2798 list_item->jump = jump;
2799 *list = list_item;
2800 }
2801 }
2802
add_stub(compiler_common * common,struct sljit_jump * start)2803 static void add_stub(compiler_common *common, struct sljit_jump *start)
2804 {
2805 DEFINE_COMPILER;
2806 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2807
2808 if (list_item)
2809 {
2810 list_item->start = start;
2811 list_item->quit = LABEL();
2812 list_item->next = common->stubs;
2813 common->stubs = list_item;
2814 }
2815 }
2816
flush_stubs(compiler_common * common)2817 static void flush_stubs(compiler_common *common)
2818 {
2819 DEFINE_COMPILER;
2820 stub_list *list_item = common->stubs;
2821
2822 while (list_item)
2823 {
2824 JUMPHERE(list_item->start);
2825 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2826 JUMPTO(SLJIT_JUMP, list_item->quit);
2827 list_item = list_item->next;
2828 }
2829 common->stubs = NULL;
2830 }
2831
add_label_addr(compiler_common * common,sljit_uw * update_addr)2832 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2833 {
2834 DEFINE_COMPILER;
2835 label_addr_list *label_addr;
2836
2837 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2838 if (label_addr == NULL)
2839 return;
2840 label_addr->label = LABEL();
2841 label_addr->update_addr = update_addr;
2842 label_addr->next = common->label_addrs;
2843 common->label_addrs = label_addr;
2844 }
2845
count_match(compiler_common * common)2846 static SLJIT_INLINE void count_match(compiler_common *common)
2847 {
2848 DEFINE_COMPILER;
2849
2850 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2851 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2852 }
2853
allocate_stack(compiler_common * common,int size)2854 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2855 {
2856 /* May destroy all locals and registers except TMP2. */
2857 DEFINE_COMPILER;
2858
2859 SLJIT_ASSERT(size > 0);
2860 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2861 #ifdef DESTROY_REGISTERS
2862 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2863 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2864 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2867 #endif
2868 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2869 }
2870
free_stack(compiler_common * common,int size)2871 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2872 {
2873 DEFINE_COMPILER;
2874
2875 SLJIT_ASSERT(size > 0);
2876 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2877 }
2878
allocate_read_only_data(compiler_common * common,sljit_uw size)2879 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2880 {
2881 DEFINE_COMPILER;
2882 sljit_uw *result;
2883
2884 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2885 return NULL;
2886
2887 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2888 if (SLJIT_UNLIKELY(result == NULL))
2889 {
2890 sljit_set_compiler_memory_error(compiler);
2891 return NULL;
2892 }
2893
2894 *(void**)result = common->read_only_data_head;
2895 common->read_only_data_head = (void *)result;
2896 return result + 1;
2897 }
2898
reset_ovector(compiler_common * common,int length)2899 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2900 {
2901 DEFINE_COMPILER;
2902 struct sljit_label *loop;
2903 sljit_s32 i;
2904
2905 /* At this point we can freely use all temporary registers. */
2906 SLJIT_ASSERT(length > 1);
2907 /* TMP1 returns with begin - 1. */
2908 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2909 if (length < 8)
2910 {
2911 for (i = 1; i < length; i++)
2912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2913 }
2914 else
2915 {
2916 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2917 {
2918 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2919 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2920 loop = LABEL();
2921 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2922 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2923 JUMPTO(SLJIT_NOT_ZERO, loop);
2924 }
2925 else
2926 {
2927 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2928 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2929 loop = LABEL();
2930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2931 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2932 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2933 JUMPTO(SLJIT_NOT_ZERO, loop);
2934 }
2935 }
2936 }
2937
reset_fast_fail(compiler_common * common)2938 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2939 {
2940 DEFINE_COMPILER;
2941 sljit_s32 i;
2942
2943 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2944
2945 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2946 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2948 }
2949
do_reset_match(compiler_common * common,int length)2950 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2951 {
2952 DEFINE_COMPILER;
2953 struct sljit_label *loop;
2954 int i;
2955
2956 SLJIT_ASSERT(length > 1);
2957 /* OVECTOR(1) contains the "string begin - 1" constant. */
2958 if (length > 2)
2959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2960 if (length < 8)
2961 {
2962 for (i = 2; i < length; i++)
2963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2964 }
2965 else
2966 {
2967 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2968 {
2969 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2970 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2971 loop = LABEL();
2972 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2973 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2974 JUMPTO(SLJIT_NOT_ZERO, loop);
2975 }
2976 else
2977 {
2978 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2979 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2980 loop = LABEL();
2981 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2982 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2983 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2984 JUMPTO(SLJIT_NOT_ZERO, loop);
2985 }
2986 }
2987
2988 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2989 if (common->mark_ptr != 0)
2990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2991 if (common->control_head_ptr != 0)
2992 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2993 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2995 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2996 }
2997
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2998 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2999 {
3000 while (current != NULL)
3001 {
3002 switch (current[1])
3003 {
3004 case type_then_trap:
3005 break;
3006
3007 case type_mark:
3008 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3009 return current[3];
3010 break;
3011
3012 default:
3013 SLJIT_UNREACHABLE();
3014 break;
3015 }
3016 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3017 current = (sljit_sw*)current[0];
3018 }
3019 return 0;
3020 }
3021
copy_ovector(compiler_common * common,int topbracket)3022 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3023 {
3024 DEFINE_COMPILER;
3025 struct sljit_label *loop;
3026 BOOL has_pre;
3027
3028 /* At this point we can freely use all registers. */
3029 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3031
3032 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3033 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3034 if (common->mark_ptr != 0)
3035 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3036 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3038 if (common->mark_ptr != 0)
3039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3040 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3041 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3042
3043 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3044
3045 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3046 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
3047
3048 loop = LABEL();
3049
3050 if (has_pre)
3051 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3052 else
3053 {
3054 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3055 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3056 }
3057
3058 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3059 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3060 /* Copy the integer value to the output buffer */
3061 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3062 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3063 #endif
3064
3065 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3066 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3067
3068 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3069 JUMPTO(SLJIT_NOT_ZERO, loop);
3070
3071 /* Calculate the return value, which is the maximum ovector value. */
3072 if (topbracket > 1)
3073 {
3074 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3075 {
3076 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3077 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3078
3079 /* OVECTOR(0) is never equal to SLJIT_S2. */
3080 loop = LABEL();
3081 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3082 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3083 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3084 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3085 }
3086 else
3087 {
3088 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3089 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3090
3091 /* OVECTOR(0) is never equal to SLJIT_S2. */
3092 loop = LABEL();
3093 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3094 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3095 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3096 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3097 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3098 }
3099 }
3100 else
3101 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3102 }
3103
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3104 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3105 {
3106 DEFINE_COMPILER;
3107 sljit_s32 mov_opcode;
3108
3109 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3110 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3111 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3112
3113 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
3114 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3115 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3116 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3117
3118 /* Store match begin and end. */
3119 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
3120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3121 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
3122
3123 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3124
3125 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3126 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3127 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3128 #endif
3129 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3130
3131 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3132 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3133 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3134 #endif
3135 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3136
3137 JUMPTO(SLJIT_JUMP, quit);
3138 }
3139
check_start_used_ptr(compiler_common * common)3140 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3141 {
3142 /* May destroy TMP1. */
3143 DEFINE_COMPILER;
3144 struct sljit_jump *jump;
3145
3146 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3147 {
3148 /* The value of -1 must be kept for start_used_ptr! */
3149 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3150 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3151 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3152 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3154 JUMPHERE(jump);
3155 }
3156 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3157 {
3158 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3160 JUMPHERE(jump);
3161 }
3162 }
3163
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3164 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3165 {
3166 /* Detects if the character has an othercase. */
3167 unsigned int c;
3168
3169 #ifdef SUPPORT_UNICODE
3170 if (common->utf)
3171 {
3172 GETCHAR(c, cc);
3173 if (c > 127)
3174 {
3175 return c != UCD_OTHERCASE(c);
3176 }
3177 #if PCRE2_CODE_UNIT_WIDTH != 8
3178 return common->fcc[c] != c;
3179 #endif
3180 }
3181 else
3182 #endif
3183 c = *cc;
3184 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3185 }
3186
char_othercase(compiler_common * common,unsigned int c)3187 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3188 {
3189 /* Returns with the othercase. */
3190 #ifdef SUPPORT_UNICODE
3191 if (common->utf && c > 127)
3192 {
3193 return UCD_OTHERCASE(c);
3194 }
3195 #endif
3196 return TABLE_GET(c, common->fcc, c);
3197 }
3198
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3199 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3200 {
3201 /* Detects if the character and its othercase has only 1 bit difference. */
3202 unsigned int c, oc, bit;
3203 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3204 int n;
3205 #endif
3206
3207 #ifdef SUPPORT_UNICODE
3208 if (common->utf)
3209 {
3210 GETCHAR(c, cc);
3211 if (c <= 127)
3212 oc = common->fcc[c];
3213 else
3214 {
3215 oc = UCD_OTHERCASE(c);
3216 }
3217 }
3218 else
3219 {
3220 c = *cc;
3221 oc = TABLE_GET(c, common->fcc, c);
3222 }
3223 #else
3224 c = *cc;
3225 oc = TABLE_GET(c, common->fcc, c);
3226 #endif
3227
3228 SLJIT_ASSERT(c != oc);
3229
3230 bit = c ^ oc;
3231 /* Optimized for English alphabet. */
3232 if (c <= 127 && bit == 0x20)
3233 return (0 << 8) | 0x20;
3234
3235 /* Since c != oc, they must have at least 1 bit difference. */
3236 if (!is_powerof2(bit))
3237 return 0;
3238
3239 #if PCRE2_CODE_UNIT_WIDTH == 8
3240
3241 #ifdef SUPPORT_UNICODE
3242 if (common->utf && c > 127)
3243 {
3244 n = GET_EXTRALEN(*cc);
3245 while ((bit & 0x3f) == 0)
3246 {
3247 n--;
3248 bit >>= 6;
3249 }
3250 return (n << 8) | bit;
3251 }
3252 #endif /* SUPPORT_UNICODE */
3253 return (0 << 8) | bit;
3254
3255 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3256
3257 #ifdef SUPPORT_UNICODE
3258 if (common->utf && c > 65535)
3259 {
3260 if (bit >= (1u << 10))
3261 bit >>= 10;
3262 else
3263 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3264 }
3265 #endif /* SUPPORT_UNICODE */
3266 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3267
3268 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3269 }
3270
check_partial(compiler_common * common,BOOL force)3271 static void check_partial(compiler_common *common, BOOL force)
3272 {
3273 /* Checks whether a partial matching is occurred. Does not modify registers. */
3274 DEFINE_COMPILER;
3275 struct sljit_jump *jump = NULL;
3276
3277 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3278
3279 if (common->mode == PCRE2_JIT_COMPLETE)
3280 return;
3281
3282 if (!force)
3283 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3284 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3285 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3286
3287 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3289 else
3290 {
3291 if (common->partialmatchlabel != NULL)
3292 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3293 else
3294 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3295 }
3296
3297 if (jump != NULL)
3298 JUMPHERE(jump);
3299 }
3300
check_str_end(compiler_common * common,jump_list ** end_reached)3301 static void check_str_end(compiler_common *common, jump_list **end_reached)
3302 {
3303 /* Does not affect registers. Usually used in a tight spot. */
3304 DEFINE_COMPILER;
3305 struct sljit_jump *jump;
3306
3307 if (common->mode == PCRE2_JIT_COMPLETE)
3308 {
3309 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3310 return;
3311 }
3312
3313 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3314 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3315 {
3316 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3318 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3319 }
3320 else
3321 {
3322 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3323 if (common->partialmatchlabel != NULL)
3324 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3325 else
3326 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3327 }
3328 JUMPHERE(jump);
3329 }
3330
detect_partial_match(compiler_common * common,jump_list ** backtracks)3331 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3332 {
3333 DEFINE_COMPILER;
3334 struct sljit_jump *jump;
3335
3336 if (common->mode == PCRE2_JIT_COMPLETE)
3337 {
3338 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3339 return;
3340 }
3341
3342 /* Partial matching mode. */
3343 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3344 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3345 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3346 {
3347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3348 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3349 }
3350 else
3351 {
3352 if (common->partialmatchlabel != NULL)
3353 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3354 else
3355 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3356 }
3357 JUMPHERE(jump);
3358 }
3359
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3360 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3361 {
3362 /* Reads the character into TMP1, keeps STR_PTR.
3363 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3364 DEFINE_COMPILER;
3365 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3366 struct sljit_jump *jump;
3367 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3368
3369 SLJIT_UNUSED_ARG(max);
3370 SLJIT_UNUSED_ARG(dst);
3371 SLJIT_UNUSED_ARG(dstw);
3372 SLJIT_UNUSED_ARG(backtracks);
3373
3374 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3375
3376 #ifdef SUPPORT_UNICODE
3377 #if PCRE2_CODE_UNIT_WIDTH == 8
3378 if (common->utf)
3379 {
3380 if (max < 128) return;
3381
3382 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3383 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3385 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3386 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3387 if (backtracks && common->invalid_utf)
3388 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3389 JUMPHERE(jump);
3390 }
3391 #elif PCRE2_CODE_UNIT_WIDTH == 16
3392 if (common->utf)
3393 {
3394 if (max < 0xd800) return;
3395
3396 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3397
3398 if (common->invalid_utf)
3399 {
3400 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3401 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3403 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3404 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3405 if (backtracks && common->invalid_utf)
3406 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3407 }
3408 else
3409 {
3410 /* TMP2 contains the high surrogate. */
3411 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3412 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3413 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3414 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3415 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3416 }
3417
3418 JUMPHERE(jump);
3419 }
3420 #elif PCRE2_CODE_UNIT_WIDTH == 32
3421 if (common->invalid_utf)
3422 {
3423 if (backtracks != NULL)
3424 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3425 else
3426 {
3427 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3428 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3429 }
3430 }
3431 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3432 #endif /* SUPPORT_UNICODE */
3433 }
3434
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3435 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3436 {
3437 /* Reads one character back without moving STR_PTR. TMP2 must
3438 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3439 DEFINE_COMPILER;
3440
3441 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3442 struct sljit_jump *jump;
3443 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3444
3445 SLJIT_UNUSED_ARG(max);
3446 SLJIT_UNUSED_ARG(backtracks);
3447
3448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3449
3450 #ifdef SUPPORT_UNICODE
3451 #if PCRE2_CODE_UNIT_WIDTH == 8
3452 if (common->utf)
3453 {
3454 if (max < 128) return;
3455
3456 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3457 if (common->invalid_utf)
3458 {
3459 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3460 if (backtracks != NULL)
3461 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3462 }
3463 else
3464 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3465 JUMPHERE(jump);
3466 }
3467 #elif PCRE2_CODE_UNIT_WIDTH == 16
3468 if (common->utf)
3469 {
3470 if (max < 0xd800) return;
3471
3472 if (common->invalid_utf)
3473 {
3474 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3475 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3476 if (backtracks != NULL)
3477 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3478 }
3479 else
3480 {
3481 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3482 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3483 /* TMP2 contains the low surrogate. */
3484 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3485 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3486 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3487 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3488 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3489 }
3490 JUMPHERE(jump);
3491 }
3492 #elif PCRE2_CODE_UNIT_WIDTH == 32
3493 if (common->invalid_utf)
3494 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3495 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3496 #endif /* SUPPORT_UNICODE */
3497 }
3498
3499 #define READ_CHAR_UPDATE_STR_PTR 0x1
3500 #define READ_CHAR_UTF8_NEWLINE 0x2
3501 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3502 #define READ_CHAR_VALID_UTF 0x4
3503
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3504 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3505 jump_list **backtracks, sljit_u32 options)
3506 {
3507 /* Reads the precise value of a character into TMP1, if the character is
3508 between min and max (c >= min && c <= max). Otherwise it returns with a value
3509 outside the range. Does not check STR_END. */
3510 DEFINE_COMPILER;
3511 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3512 struct sljit_jump *jump;
3513 #endif
3514 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3515 struct sljit_jump *jump2;
3516 #endif
3517
3518 SLJIT_UNUSED_ARG(min);
3519 SLJIT_UNUSED_ARG(max);
3520 SLJIT_UNUSED_ARG(backtracks);
3521 SLJIT_UNUSED_ARG(options);
3522 SLJIT_ASSERT(min <= max);
3523
3524 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3526
3527 #ifdef SUPPORT_UNICODE
3528 #if PCRE2_CODE_UNIT_WIDTH == 8
3529 if (common->utf)
3530 {
3531 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3532
3533 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3534 {
3535 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3536
3537 if (options & READ_CHAR_UTF8_NEWLINE)
3538 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3539 else
3540 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3541
3542 if (backtracks != NULL)
3543 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3544 JUMPHERE(jump);
3545 return;
3546 }
3547
3548 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3549 if (min >= 0x10000)
3550 {
3551 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3552 if (options & READ_CHAR_UPDATE_STR_PTR)
3553 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3554 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3555 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3556 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3557 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3558 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3559 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3560 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3561 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3563 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3564 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3566 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3567 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3568 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3569 JUMPHERE(jump2);
3570 if (options & READ_CHAR_UPDATE_STR_PTR)
3571 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3572 }
3573 else if (min >= 0x800 && max <= 0xffff)
3574 {
3575 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3576 if (options & READ_CHAR_UPDATE_STR_PTR)
3577 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3578 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3579 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3580 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3582 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3584 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3587 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3588 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3589 JUMPHERE(jump2);
3590 if (options & READ_CHAR_UPDATE_STR_PTR)
3591 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3592 }
3593 else if (max >= 0x800)
3594 {
3595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3596 }
3597 else if (max < 128)
3598 {
3599 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3601 }
3602 else
3603 {
3604 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3605 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3607 else
3608 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3609 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3610 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3611 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3612 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3613 if (options & READ_CHAR_UPDATE_STR_PTR)
3614 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3615 }
3616 JUMPHERE(jump);
3617 }
3618 #elif PCRE2_CODE_UNIT_WIDTH == 16
3619 if (common->utf)
3620 {
3621 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3622
3623 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3624 {
3625 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3626 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3627
3628 if (options & READ_CHAR_UTF8_NEWLINE)
3629 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3630 else
3631 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3632
3633 if (backtracks != NULL)
3634 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3635 JUMPHERE(jump);
3636 return;
3637 }
3638
3639 if (max >= 0x10000)
3640 {
3641 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3642 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3643 /* TMP2 contains the high surrogate. */
3644 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3645 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3646 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3647 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3648 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3649 JUMPHERE(jump);
3650 return;
3651 }
3652
3653 /* Skip low surrogate if necessary. */
3654 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3655
3656 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
3657 {
3658 if (options & READ_CHAR_UPDATE_STR_PTR)
3659 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3660 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3661 if (options & READ_CHAR_UPDATE_STR_PTR)
3662 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3663 if (max >= 0xd800)
3664 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3665 }
3666 else
3667 {
3668 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3669 if (options & READ_CHAR_UPDATE_STR_PTR)
3670 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3671 if (max >= 0xd800)
3672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3673 JUMPHERE(jump);
3674 }
3675 }
3676 #elif PCRE2_CODE_UNIT_WIDTH == 32
3677 if (common->invalid_utf)
3678 {
3679 if (backtracks != NULL)
3680 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3681 else
3682 {
3683 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3684 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3685 }
3686 }
3687 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3688 #endif /* SUPPORT_UNICODE */
3689 }
3690
3691 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3692
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)3693 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
3694 {
3695 /* Tells whether the character codes below 128 are enough
3696 to determine a match. */
3697 const sljit_u8 value = nclass ? 0xff : 0;
3698 const sljit_u8 *end = bitset + 32;
3699
3700 bitset += 16;
3701 do
3702 {
3703 if (*bitset++ != value)
3704 return FALSE;
3705 }
3706 while (bitset < end);
3707 return TRUE;
3708 }
3709
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)3710 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
3711 {
3712 /* Reads the precise character type of a character into TMP1, if the character
3713 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
3714 full_read argument tells whether characters above max are accepted or not. */
3715 DEFINE_COMPILER;
3716 struct sljit_jump *jump;
3717
3718 SLJIT_ASSERT(common->utf);
3719
3720 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3722
3723 /* All values > 127 are zero in ctypes. */
3724 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3725
3726 if (negated)
3727 {
3728 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
3729
3730 if (common->invalid_utf)
3731 {
3732 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3733 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3734 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3735 }
3736 else
3737 {
3738 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3740 }
3741 JUMPHERE(jump);
3742 }
3743 }
3744
3745 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3746
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)3747 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
3748 {
3749 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3750 DEFINE_COMPILER;
3751 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3752 struct sljit_jump *jump;
3753 #endif
3754 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3755 struct sljit_jump *jump2;
3756 #endif
3757
3758 SLJIT_UNUSED_ARG(backtracks);
3759 SLJIT_UNUSED_ARG(negated);
3760
3761 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3763
3764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3765 if (common->utf)
3766 {
3767 /* The result of this read may be unused, but saves an "else" part. */
3768 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3769 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
3770
3771 if (!negated)
3772 {
3773 if (common->invalid_utf)
3774 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3775
3776 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3778 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
3779 if (common->invalid_utf)
3780 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
3781
3782 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3783 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3784 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
3785 if (common->invalid_utf)
3786 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
3787
3788 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3789 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3790 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3791 JUMPHERE(jump2);
3792 }
3793 else if (common->invalid_utf)
3794 {
3795 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3796 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
3797 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3798
3799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3800 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3801 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3802 JUMPHERE(jump2);
3803 }
3804 else
3805 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3806
3807 JUMPHERE(jump);
3808 return;
3809 }
3810 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3811
3812 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
3813 if (common->invalid_utf && negated)
3814 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
3815 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
3816
3817 #if PCRE2_CODE_UNIT_WIDTH != 8
3818 /* The ctypes array contains only 256 values. */
3819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3820 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3821 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3822 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3823 #if PCRE2_CODE_UNIT_WIDTH != 8
3824 JUMPHERE(jump);
3825 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3826
3827 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3828 if (common->utf && negated)
3829 {
3830 /* Skip low surrogate if necessary. */
3831 if (!common->invalid_utf)
3832 {
3833 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3834
3835 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
3836 {
3837 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3838 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3839 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3840 }
3841 else
3842 {
3843 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3845 JUMPHERE(jump);
3846 }
3847 return;
3848 }
3849
3850 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3851 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3852 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
3853 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3854
3855 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3857 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
3858 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
3859
3860 JUMPHERE(jump);
3861 return;
3862 }
3863 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3864 }
3865
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)3866 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
3867 {
3868 /* Goes one character back. TMP2 must contain the start of
3869 the subject buffer. Affects STR_PTR and TMP1. Does not modify
3870 STR_PTR for invalid character sequences. */
3871 DEFINE_COMPILER;
3872
3873 SLJIT_UNUSED_ARG(backtracks);
3874 SLJIT_UNUSED_ARG(must_be_valid);
3875
3876 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3877 struct sljit_jump *jump;
3878 #endif
3879
3880 #ifdef SUPPORT_UNICODE
3881 #if PCRE2_CODE_UNIT_WIDTH == 8
3882 struct sljit_label *label;
3883
3884 if (common->utf)
3885 {
3886 if (!must_be_valid && common->invalid_utf)
3887 {
3888 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3890 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3891 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
3892 if (backtracks != NULL)
3893 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3894 JUMPHERE(jump);
3895 return;
3896 }
3897
3898 label = LABEL();
3899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3900 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3901 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3902 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3903 return;
3904 }
3905 #elif PCRE2_CODE_UNIT_WIDTH == 16
3906 if (common->utf)
3907 {
3908 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3909 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910
3911 if (!must_be_valid && common->invalid_utf)
3912 {
3913 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3914 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
3915 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
3916 if (backtracks != NULL)
3917 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3918 JUMPHERE(jump);
3919 return;
3920 }
3921
3922 /* Skip low surrogate if necessary. */
3923 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3924 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3925 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3926 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3927 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3928 return;
3929 }
3930 #elif PCRE2_CODE_UNIT_WIDTH == 32
3931 if (common->invalid_utf && !must_be_valid)
3932 {
3933 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3934 if (backtracks != NULL)
3935 {
3936 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3937 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3938 return;
3939 }
3940
3941 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3942 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
3943 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3944 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3945 return;
3946 }
3947 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3948 #endif /* SUPPORT_UNICODE */
3949 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3950 }
3951
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3952 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3953 {
3954 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3955 DEFINE_COMPILER;
3956 struct sljit_jump *jump;
3957
3958 if (nltype == NLTYPE_ANY)
3959 {
3960 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3961 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3962 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3963 }
3964 else if (nltype == NLTYPE_ANYCRLF)
3965 {
3966 if (jumpifmatch)
3967 {
3968 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3969 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3970 }
3971 else
3972 {
3973 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3974 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3975 JUMPHERE(jump);
3976 }
3977 }
3978 else
3979 {
3980 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3981 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3982 }
3983 }
3984
3985 #ifdef SUPPORT_UNICODE
3986
3987 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3988 static void do_utfreadchar(compiler_common *common)
3989 {
3990 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3991 of the character (>= 0xc0). Return char value in TMP1. */
3992 DEFINE_COMPILER;
3993 struct sljit_jump *jump;
3994
3995 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3996 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3997 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3998 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3999 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4000
4001 /* Searching for the first zero. */
4002 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4003 jump = JUMP(SLJIT_NOT_ZERO);
4004 /* Two byte sequence. */
4005 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4007 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4008
4009 JUMPHERE(jump);
4010 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4011 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4012 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4014
4015 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4016 jump = JUMP(SLJIT_NOT_ZERO);
4017 /* Three byte sequence. */
4018 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4020 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4021
4022 /* Four byte sequence. */
4023 JUMPHERE(jump);
4024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4025 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4027 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4028 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4029 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4031 }
4032
do_utfreadtype8(compiler_common * common)4033 static void do_utfreadtype8(compiler_common *common)
4034 {
4035 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4036 of the character (>= 0xc0). Return value in TMP1. */
4037 DEFINE_COMPILER;
4038 struct sljit_jump *jump;
4039 struct sljit_jump *compare;
4040
4041 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4042
4043 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4044 jump = JUMP(SLJIT_NOT_ZERO);
4045 /* Two byte sequence. */
4046 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4048 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4049 /* The upper 5 bits are known at this point. */
4050 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4051 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4052 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4053 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4054 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4055 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4056
4057 JUMPHERE(compare);
4058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4059 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4060
4061 /* We only have types for characters less than 256. */
4062 JUMPHERE(jump);
4063 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4065 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4066 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4067 }
4068
do_utfreadchar_invalid(compiler_common * common)4069 static void do_utfreadchar_invalid(compiler_common *common)
4070 {
4071 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4072 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4073 undefined for invalid characters. */
4074 DEFINE_COMPILER;
4075 sljit_s32 i;
4076 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4077 struct sljit_jump *jump;
4078 struct sljit_jump *buffer_end_close;
4079 struct sljit_label *three_byte_entry;
4080 struct sljit_label *exit_invalid_label;
4081 struct sljit_jump *exit_invalid[11];
4082
4083 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4084
4085 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4086
4087 /* Usually more than 3 characters remained in the subject buffer. */
4088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4089
4090 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4091 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4092
4093 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4094
4095 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4096 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4097 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4098 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4099 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4100 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4101
4102 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4103 jump = JUMP(SLJIT_NOT_ZERO);
4104
4105 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4106 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4107
4108 JUMPHERE(jump);
4109
4110 /* Three-byte sequence. */
4111 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4112 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4113 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4114 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4115 if (has_cmov)
4116 {
4117 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4118 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4119 exit_invalid[2] = NULL;
4120 }
4121 else
4122 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4123
4124 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4125 jump = JUMP(SLJIT_NOT_ZERO);
4126
4127 three_byte_entry = LABEL();
4128
4129 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4130 if (has_cmov)
4131 {
4132 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4133 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4134 exit_invalid[3] = NULL;
4135 }
4136 else
4137 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4138 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4139 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4140
4141 if (has_cmov)
4142 {
4143 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4144 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4145 exit_invalid[4] = NULL;
4146 }
4147 else
4148 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4149 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4150
4151 JUMPHERE(jump);
4152
4153 /* Four-byte sequence. */
4154 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4155 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4156 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4157 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4158 if (has_cmov)
4159 {
4160 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4161 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4162 exit_invalid[5] = NULL;
4163 }
4164 else
4165 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4166
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4168 if (has_cmov)
4169 {
4170 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4171 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4172 exit_invalid[6] = NULL;
4173 }
4174 else
4175 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4176
4177 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4178 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4179
4180 JUMPHERE(buffer_end_close);
4181 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4182 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4183
4184 /* Two-byte sequence. */
4185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4187 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4188 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4189 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4190 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4191
4192 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4193 jump = JUMP(SLJIT_NOT_ZERO);
4194
4195 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4196
4197 /* Three-byte sequence. */
4198 JUMPHERE(jump);
4199 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4200
4201 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4202 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4203 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4204 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4205 if (has_cmov)
4206 {
4207 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4208 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4209 exit_invalid[10] = NULL;
4210 }
4211 else
4212 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4213
4214 /* One will be substracted from STR_PTR later. */
4215 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4216
4217 /* Four byte sequences are not possible. */
4218 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4219
4220 exit_invalid_label = LABEL();
4221 for (i = 0; i < 11; i++)
4222 sljit_set_label(exit_invalid[i], exit_invalid_label);
4223
4224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4225 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4226 }
4227
do_utfreadnewline_invalid(compiler_common * common)4228 static void do_utfreadnewline_invalid(compiler_common *common)
4229 {
4230 /* Slow decoding a UTF-8 character, specialized for newlines.
4231 TMP1 contains the first byte of the character (>= 0xc0). Return
4232 char value in TMP1. */
4233 DEFINE_COMPILER;
4234 struct sljit_label *loop;
4235 struct sljit_label *skip_start;
4236 struct sljit_label *three_byte_exit;
4237 struct sljit_jump *jump[5];
4238
4239 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4240
4241 if (common->nltype != NLTYPE_ANY)
4242 {
4243 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4244
4245 /* All newlines are ascii, just skip intermediate octets. */
4246 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4247 loop = LABEL();
4248 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4249 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4250 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4251 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4252
4253 JUMPHERE(jump[0]);
4254
4255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4256 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4257 return;
4258 }
4259
4260 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4261 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263
4264 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4265 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4266
4267 skip_start = LABEL();
4268 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4269 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4270
4271 /* Skip intermediate octets. */
4272 loop = LABEL();
4273 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4274 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4275 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4276 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4277 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4278
4279 JUMPHERE(jump[3]);
4280 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4281
4282 three_byte_exit = LABEL();
4283 JUMPHERE(jump[0]);
4284 JUMPHERE(jump[4]);
4285
4286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4287 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4288
4289 /* Two byte long newline: 0x85. */
4290 JUMPHERE(jump[1]);
4291 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4292
4293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4295
4296 /* Three byte long newlines: 0x2028 and 0x2029. */
4297 JUMPHERE(jump[2]);
4298 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4299 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4300
4301 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4302 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4303
4304 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4305 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4306
4307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4308 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4309 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4310 }
4311
do_utfmoveback_invalid(compiler_common * common)4312 static void do_utfmoveback_invalid(compiler_common *common)
4313 {
4314 /* Goes one character back. */
4315 DEFINE_COMPILER;
4316 sljit_s32 i;
4317 struct sljit_jump *jump;
4318 struct sljit_jump *buffer_start_close;
4319 struct sljit_label *exit_ok_label;
4320 struct sljit_label *exit_invalid_label;
4321 struct sljit_jump *exit_invalid[7];
4322
4323 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4324
4325 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4326 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4327
4328 /* Two-byte sequence. */
4329 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4330
4331 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4332
4333 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4334 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4335
4336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4338 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4339
4340 /* Three-byte sequence. */
4341 JUMPHERE(jump);
4342 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4343
4344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4345
4346 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4347 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4348
4349 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4350 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4352
4353 /* Four-byte sequence. */
4354 JUMPHERE(jump);
4355 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4356 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4357
4358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4359 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4360 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4361
4362 exit_ok_label = LABEL();
4363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4364 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4365
4366 /* Two-byte sequence. */
4367 JUMPHERE(buffer_start_close);
4368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4369
4370 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4371
4372 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4373
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4375 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4376
4377 /* Three-byte sequence. */
4378 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4379 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4380 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4381
4382 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4383
4384 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4385 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4386
4387 /* Four-byte sequences are not possible. */
4388
4389 exit_invalid_label = LABEL();
4390 sljit_set_label(exit_invalid[5], exit_invalid_label);
4391 sljit_set_label(exit_invalid[6], exit_invalid_label);
4392 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4394 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4395
4396 JUMPHERE(exit_invalid[4]);
4397 /* -2 + 4 = 2 */
4398 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4399
4400 exit_invalid_label = LABEL();
4401 for (i = 0; i < 4; i++)
4402 sljit_set_label(exit_invalid[i], exit_invalid_label);
4403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4404 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4405 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4406 }
4407
do_utfpeakcharback(compiler_common * common)4408 static void do_utfpeakcharback(compiler_common *common)
4409 {
4410 /* Peak a character back. */
4411 DEFINE_COMPILER;
4412 struct sljit_jump *jump[2];
4413
4414 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4415
4416 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4417 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4418 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4419
4420 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4422 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4423
4424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4425 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4426 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4427 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4428 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4429
4430 JUMPHERE(jump[1]);
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4432 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4433 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4434 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4435
4436 JUMPHERE(jump[0]);
4437 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4438 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4439 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4440 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4441
4442 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4443 }
4444
do_utfpeakcharback_invalid(compiler_common * common)4445 static void do_utfpeakcharback_invalid(compiler_common *common)
4446 {
4447 /* Peak a character back. */
4448 DEFINE_COMPILER;
4449 sljit_s32 i;
4450 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4451 struct sljit_jump *jump[2];
4452 struct sljit_label *two_byte_entry;
4453 struct sljit_label *three_byte_entry;
4454 struct sljit_label *exit_invalid_label;
4455 struct sljit_jump *exit_invalid[8];
4456
4457 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4458
4459 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4460 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4461 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4462
4463 /* Two-byte sequence. */
4464 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4465 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4466 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4467
4468 two_byte_entry = LABEL();
4469 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4470 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4473
4474 JUMPHERE(jump[1]);
4475 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4476 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4477 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4478 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4479 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4480
4481 /* Three-byte sequence. */
4482 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4483 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4484 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4485
4486 three_byte_entry = LABEL();
4487 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4488 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4489
4490 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4491 if (has_cmov)
4492 {
4493 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4494 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4495 exit_invalid[2] = NULL;
4496 }
4497 else
4498 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4499
4500 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4501 if (has_cmov)
4502 {
4503 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4504 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4505 exit_invalid[3] = NULL;
4506 }
4507 else
4508 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4509
4510 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4511
4512 JUMPHERE(jump[1]);
4513 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4514 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4515 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4516 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4517
4518 /* Four-byte sequence. */
4519 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4520 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4521 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4522 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4523 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4524 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4525
4526 if (has_cmov)
4527 {
4528 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4529 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4530 exit_invalid[5] = NULL;
4531 }
4532 else
4533 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4534
4535 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4536 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4537
4538 JUMPHERE(jump[0]);
4539 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4540 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4541
4542 /* Two-byte sequence. */
4543 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4544 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4545 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4546
4547 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4548 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4549 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4551 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4552
4553 /* Three-byte sequence. */
4554 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4555 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4556 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4557
4558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4559 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4560
4561 JUMPHERE(jump[0]);
4562 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4563
4564 /* Two-byte sequence. */
4565 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4566 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4567 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4568
4569 exit_invalid_label = LABEL();
4570 for (i = 0; i < 8; i++)
4571 sljit_set_label(exit_invalid[i], exit_invalid_label);
4572
4573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4574 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4575 }
4576
4577 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4578
4579 #if PCRE2_CODE_UNIT_WIDTH == 16
4580
do_utfreadchar_invalid(compiler_common * common)4581 static void do_utfreadchar_invalid(compiler_common *common)
4582 {
4583 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4584 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4585 undefined for invalid characters. */
4586 DEFINE_COMPILER;
4587 struct sljit_jump *exit_invalid[3];
4588
4589 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4590
4591 /* TMP2 contains the high surrogate. */
4592 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4593 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4594
4595 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4596 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4598
4599 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4600 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4601 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4602
4603 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4604 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4605
4606 JUMPHERE(exit_invalid[0]);
4607 JUMPHERE(exit_invalid[1]);
4608 JUMPHERE(exit_invalid[2]);
4609 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4610 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4611 }
4612
do_utfreadnewline_invalid(compiler_common * common)4613 static void do_utfreadnewline_invalid(compiler_common *common)
4614 {
4615 /* Slow decoding a UTF-16 character, specialized for newlines.
4616 TMP1 contains the first half of the character (>= 0xd800). Return
4617 char value in TMP1. */
4618
4619 DEFINE_COMPILER;
4620 struct sljit_jump *exit_invalid[2];
4621
4622 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4623
4624 /* TMP2 contains the high surrogate. */
4625 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4626
4627 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4628 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4629
4630 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4631 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4632 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4634 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4636
4637 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4638
4639 JUMPHERE(exit_invalid[0]);
4640 JUMPHERE(exit_invalid[1]);
4641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4642 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4643 }
4644
do_utfmoveback_invalid(compiler_common * common)4645 static void do_utfmoveback_invalid(compiler_common *common)
4646 {
4647 /* Goes one character back. */
4648 DEFINE_COMPILER;
4649 struct sljit_jump *exit_invalid[3];
4650
4651 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4652
4653 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4654 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4655
4656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4657 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4658 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4659
4660 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4662 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4663
4664 JUMPHERE(exit_invalid[0]);
4665 JUMPHERE(exit_invalid[1]);
4666 JUMPHERE(exit_invalid[2]);
4667
4668 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4670 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4671 }
4672
do_utfpeakcharback_invalid(compiler_common * common)4673 static void do_utfpeakcharback_invalid(compiler_common *common)
4674 {
4675 /* Peak a character back. */
4676 DEFINE_COMPILER;
4677 struct sljit_jump *jump;
4678 struct sljit_jump *exit_invalid[3];
4679
4680 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4681
4682 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
4683 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4684 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
4685 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4686
4687 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4688 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4689 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4690 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4691 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4692 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4693
4694 JUMPHERE(jump);
4695 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4696
4697 JUMPHERE(exit_invalid[0]);
4698 JUMPHERE(exit_invalid[1]);
4699 JUMPHERE(exit_invalid[2]);
4700
4701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4702 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4703 }
4704
4705 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
4706
4707 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
4708 #define UCD_BLOCK_MASK 127
4709 #define UCD_BLOCK_SHIFT 7
4710
do_getucd(compiler_common * common)4711 static void do_getucd(compiler_common *common)
4712 {
4713 /* Search the UCD record for the character comes in TMP1.
4714 Returns chartype in TMP1 and UCD offset in TMP2. */
4715 DEFINE_COMPILER;
4716 #if PCRE2_CODE_UNIT_WIDTH == 32
4717 struct sljit_jump *jump;
4718 #endif
4719
4720 #if defined SLJIT_DEBUG && SLJIT_DEBUG
4721 /* dummy_ucd_record */
4722 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
4723 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
4724 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
4725 #endif
4726
4727 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
4728
4729 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4730
4731 #if PCRE2_CODE_UNIT_WIDTH == 32
4732 if (!common->utf)
4733 {
4734 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
4735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
4736 JUMPHERE(jump);
4737 }
4738 #endif
4739
4740 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4741 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4742 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
4743 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
4744 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4745 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
4747 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
4748 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4749 }
4750
do_getucdtype(compiler_common * common)4751 static void do_getucdtype(compiler_common *common)
4752 {
4753 /* Search the UCD record for the character comes in TMP1.
4754 Returns chartype in TMP1 and UCD offset in TMP2. */
4755 DEFINE_COMPILER;
4756 #if PCRE2_CODE_UNIT_WIDTH == 32
4757 struct sljit_jump *jump;
4758 #endif
4759
4760 #if defined SLJIT_DEBUG && SLJIT_DEBUG
4761 /* dummy_ucd_record */
4762 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
4763 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
4764 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
4765 #endif
4766
4767 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
4768
4769 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4770
4771 #if PCRE2_CODE_UNIT_WIDTH == 32
4772 if (!common->utf)
4773 {
4774 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
4775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
4776 JUMPHERE(jump);
4777 }
4778 #endif
4779
4780 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4781 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4782 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
4783 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
4784 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4785 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
4787 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
4788
4789 // PH hacking
4790 //fprintf(stderr, "~~A\n");
4791 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
4792 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4793 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4794 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
4795
4796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
4797
4798 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
4799
4800 // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4801 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4802 }
4803
4804 #endif /* SUPPORT_UNICODE */
4805
mainloop_entry(compiler_common * common)4806 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
4807 {
4808 DEFINE_COMPILER;
4809 struct sljit_label *mainloop;
4810 struct sljit_label *newlinelabel = NULL;
4811 struct sljit_jump *start;
4812 struct sljit_jump *end = NULL;
4813 struct sljit_jump *end2 = NULL;
4814 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4815 struct sljit_label *loop;
4816 struct sljit_jump *jump;
4817 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4818 jump_list *newline = NULL;
4819 sljit_u32 overall_options = common->re->overall_options;
4820 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
4821 BOOL newlinecheck = FALSE;
4822 BOOL readuchar = FALSE;
4823
4824 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
4825 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
4826 newlinecheck = TRUE;
4827
4828 SLJIT_ASSERT(common->abort_label == NULL);
4829
4830 if ((overall_options & PCRE2_FIRSTLINE) != 0)
4831 {
4832 /* Search for the end of the first line. */
4833 SLJIT_ASSERT(common->match_end_ptr != 0);
4834 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4835
4836 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4837 {
4838 mainloop = LABEL();
4839 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4840 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4841 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4842 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4843 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
4844 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
4845 JUMPHERE(end);
4846 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847 }
4848 else
4849 {
4850 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4851 mainloop = LABEL();
4852 /* Continual stores does not cause data dependency. */
4853 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
4854 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
4855 check_newlinechar(common, common->nltype, &newline, TRUE);
4856 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
4857 JUMPHERE(end);
4858 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
4859 set_jumps(newline, LABEL());
4860 }
4861
4862 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4863 }
4864 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
4865 {
4866 /* Check whether offset limit is set and valid. */
4867 SLJIT_ASSERT(common->match_end_ptr != 0);
4868
4869 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
4871 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
4872 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
4873 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4874 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4875 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4876 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
4877 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4878 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4879 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
4880 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
4881 JUMPHERE(end2);
4882 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
4883 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
4884 JUMPHERE(end);
4885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
4886 }
4887
4888 start = JUMP(SLJIT_JUMP);
4889
4890 if (newlinecheck)
4891 {
4892 newlinelabel = LABEL();
4893 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4894 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4895 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4896 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
4897 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4898 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
4899 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4900 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
4901 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4902 end2 = JUMP(SLJIT_JUMP);
4903 }
4904
4905 mainloop = LABEL();
4906
4907 /* Increasing the STR_PTR here requires one less jump in the most common case. */
4908 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4909 if (common->utf && !common->invalid_utf) readuchar = TRUE;
4910 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4911 if (newlinecheck) readuchar = TRUE;
4912
4913 if (readuchar)
4914 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4915
4916 if (newlinecheck)
4917 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
4918
4919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4920 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4921 #if PCRE2_CODE_UNIT_WIDTH == 8
4922 if (common->invalid_utf)
4923 {
4924 /* Skip continuation code units. */
4925 loop = LABEL();
4926 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4927 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4928 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4929 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4930 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
4931 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4932 JUMPHERE(jump);
4933 }
4934 else if (common->utf)
4935 {
4936 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4937 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4938 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4939 JUMPHERE(jump);
4940 }
4941 #elif PCRE2_CODE_UNIT_WIDTH == 16
4942 if (common->invalid_utf)
4943 {
4944 /* Skip continuation code units. */
4945 loop = LABEL();
4946 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4947 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4949 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4950 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
4951 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4952 JUMPHERE(jump);
4953 }
4954 else if (common->utf)
4955 {
4956 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4957
4958 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
4959 {
4960 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4961 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
4962 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
4963 }
4964 else
4965 {
4966 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
4967 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4968 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4969 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4970 }
4971 }
4972 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
4973 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4974 JUMPHERE(start);
4975
4976 if (newlinecheck)
4977 {
4978 JUMPHERE(end);
4979 JUMPHERE(end2);
4980 }
4981
4982 return mainloop;
4983 }
4984
4985
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)4986 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
4987 {
4988 sljit_u32 i, count = chars->count;
4989
4990 if (count == 255)
4991 return;
4992
4993 if (count == 0)
4994 {
4995 chars->count = 1;
4996 chars->chars[0] = chr;
4997
4998 if (last)
4999 chars->last_count = 1;
5000 return;
5001 }
5002
5003 for (i = 0; i < count; i++)
5004 if (chars->chars[i] == chr)
5005 return;
5006
5007 if (count >= MAX_DIFF_CHARS)
5008 {
5009 chars->count = 255;
5010 return;
5011 }
5012
5013 chars->chars[count] = chr;
5014 chars->count = count + 1;
5015
5016 if (last)
5017 chars->last_count++;
5018 }
5019
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5020 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5021 {
5022 /* Recursive function, which scans prefix literals. */
5023 BOOL last, any, class, caseless;
5024 int len, repeat, len_save, consumed = 0;
5025 sljit_u32 chr; /* Any unicode character. */
5026 sljit_u8 *bytes, *bytes_end, byte;
5027 PCRE2_SPTR alternative, cc_save, oc;
5028 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5029 PCRE2_UCHAR othercase[4];
5030 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5031 PCRE2_UCHAR othercase[2];
5032 #else
5033 PCRE2_UCHAR othercase[1];
5034 #endif
5035
5036 repeat = 1;
5037 while (TRUE)
5038 {
5039 if (*rec_count == 0)
5040 return 0;
5041 (*rec_count)--;
5042
5043 last = TRUE;
5044 any = FALSE;
5045 class = FALSE;
5046 caseless = FALSE;
5047
5048 switch (*cc)
5049 {
5050 case OP_CHARI:
5051 caseless = TRUE;
5052 /* Fall through */
5053 case OP_CHAR:
5054 last = FALSE;
5055 cc++;
5056 break;
5057
5058 case OP_SOD:
5059 case OP_SOM:
5060 case OP_SET_SOM:
5061 case OP_NOT_WORD_BOUNDARY:
5062 case OP_WORD_BOUNDARY:
5063 case OP_EODN:
5064 case OP_EOD:
5065 case OP_CIRC:
5066 case OP_CIRCM:
5067 case OP_DOLL:
5068 case OP_DOLLM:
5069 /* Zero width assertions. */
5070 cc++;
5071 continue;
5072
5073 case OP_ASSERT:
5074 case OP_ASSERT_NOT:
5075 case OP_ASSERTBACK:
5076 case OP_ASSERTBACK_NOT:
5077 cc = bracketend(cc);
5078 continue;
5079
5080 case OP_PLUSI:
5081 case OP_MINPLUSI:
5082 case OP_POSPLUSI:
5083 caseless = TRUE;
5084 /* Fall through */
5085 case OP_PLUS:
5086 case OP_MINPLUS:
5087 case OP_POSPLUS:
5088 cc++;
5089 break;
5090
5091 case OP_EXACTI:
5092 caseless = TRUE;
5093 /* Fall through */
5094 case OP_EXACT:
5095 repeat = GET2(cc, 1);
5096 last = FALSE;
5097 cc += 1 + IMM2_SIZE;
5098 break;
5099
5100 case OP_QUERYI:
5101 case OP_MINQUERYI:
5102 case OP_POSQUERYI:
5103 caseless = TRUE;
5104 /* Fall through */
5105 case OP_QUERY:
5106 case OP_MINQUERY:
5107 case OP_POSQUERY:
5108 len = 1;
5109 cc++;
5110 #ifdef SUPPORT_UNICODE
5111 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5112 #endif
5113 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5114 if (max_chars == 0)
5115 return consumed;
5116 last = FALSE;
5117 break;
5118
5119 case OP_KET:
5120 cc += 1 + LINK_SIZE;
5121 continue;
5122
5123 case OP_ALT:
5124 cc += GET(cc, 1);
5125 continue;
5126
5127 case OP_ONCE:
5128 case OP_BRA:
5129 case OP_BRAPOS:
5130 case OP_CBRA:
5131 case OP_CBRAPOS:
5132 alternative = cc + GET(cc, 1);
5133 while (*alternative == OP_ALT)
5134 {
5135 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5136 if (max_chars == 0)
5137 return consumed;
5138 alternative += GET(alternative, 1);
5139 }
5140
5141 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5142 cc += IMM2_SIZE;
5143 cc += 1 + LINK_SIZE;
5144 continue;
5145
5146 case OP_CLASS:
5147 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5148 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5149 return consumed;
5150 #endif
5151 class = TRUE;
5152 break;
5153
5154 case OP_NCLASS:
5155 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5156 if (common->utf) return consumed;
5157 #endif
5158 class = TRUE;
5159 break;
5160
5161 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5162 case OP_XCLASS:
5163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5164 if (common->utf) return consumed;
5165 #endif
5166 any = TRUE;
5167 cc += GET(cc, 1);
5168 break;
5169 #endif
5170
5171 case OP_DIGIT:
5172 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5173 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5174 return consumed;
5175 #endif
5176 any = TRUE;
5177 cc++;
5178 break;
5179
5180 case OP_WHITESPACE:
5181 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5182 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5183 return consumed;
5184 #endif
5185 any = TRUE;
5186 cc++;
5187 break;
5188
5189 case OP_WORDCHAR:
5190 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5191 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5192 return consumed;
5193 #endif
5194 any = TRUE;
5195 cc++;
5196 break;
5197
5198 case OP_NOT:
5199 case OP_NOTI:
5200 cc++;
5201 /* Fall through. */
5202 case OP_NOT_DIGIT:
5203 case OP_NOT_WHITESPACE:
5204 case OP_NOT_WORDCHAR:
5205 case OP_ANY:
5206 case OP_ALLANY:
5207 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5208 if (common->utf) return consumed;
5209 #endif
5210 any = TRUE;
5211 cc++;
5212 break;
5213
5214 #ifdef SUPPORT_UNICODE
5215 case OP_NOTPROP:
5216 case OP_PROP:
5217 #if PCRE2_CODE_UNIT_WIDTH != 32
5218 if (common->utf) return consumed;
5219 #endif
5220 any = TRUE;
5221 cc += 1 + 2;
5222 break;
5223 #endif
5224
5225 case OP_TYPEEXACT:
5226 repeat = GET2(cc, 1);
5227 cc += 1 + IMM2_SIZE;
5228 continue;
5229
5230 case OP_NOTEXACT:
5231 case OP_NOTEXACTI:
5232 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5233 if (common->utf) return consumed;
5234 #endif
5235 any = TRUE;
5236 repeat = GET2(cc, 1);
5237 cc += 1 + IMM2_SIZE + 1;
5238 break;
5239
5240 default:
5241 return consumed;
5242 }
5243
5244 if (any)
5245 {
5246 do
5247 {
5248 chars->count = 255;
5249
5250 consumed++;
5251 if (--max_chars == 0)
5252 return consumed;
5253 chars++;
5254 }
5255 while (--repeat > 0);
5256
5257 repeat = 1;
5258 continue;
5259 }
5260
5261 if (class)
5262 {
5263 bytes = (sljit_u8*) (cc + 1);
5264 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5265
5266 switch (*cc)
5267 {
5268 case OP_CRSTAR:
5269 case OP_CRMINSTAR:
5270 case OP_CRPOSSTAR:
5271 case OP_CRQUERY:
5272 case OP_CRMINQUERY:
5273 case OP_CRPOSQUERY:
5274 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5275 if (max_chars == 0)
5276 return consumed;
5277 break;
5278
5279 default:
5280 case OP_CRPLUS:
5281 case OP_CRMINPLUS:
5282 case OP_CRPOSPLUS:
5283 break;
5284
5285 case OP_CRRANGE:
5286 case OP_CRMINRANGE:
5287 case OP_CRPOSRANGE:
5288 repeat = GET2(cc, 1);
5289 if (repeat <= 0)
5290 return consumed;
5291 break;
5292 }
5293
5294 do
5295 {
5296 if (bytes[31] & 0x80)
5297 chars->count = 255;
5298 else if (chars->count != 255)
5299 {
5300 bytes_end = bytes + 32;
5301 chr = 0;
5302 do
5303 {
5304 byte = *bytes++;
5305 SLJIT_ASSERT((chr & 0x7) == 0);
5306 if (byte == 0)
5307 chr += 8;
5308 else
5309 {
5310 do
5311 {
5312 if ((byte & 0x1) != 0)
5313 add_prefix_char(chr, chars, TRUE);
5314 byte >>= 1;
5315 chr++;
5316 }
5317 while (byte != 0);
5318 chr = (chr + 7) & ~7;
5319 }
5320 }
5321 while (chars->count != 255 && bytes < bytes_end);
5322 bytes = bytes_end - 32;
5323 }
5324
5325 consumed++;
5326 if (--max_chars == 0)
5327 return consumed;
5328 chars++;
5329 }
5330 while (--repeat > 0);
5331
5332 switch (*cc)
5333 {
5334 case OP_CRSTAR:
5335 case OP_CRMINSTAR:
5336 case OP_CRPOSSTAR:
5337 return consumed;
5338
5339 case OP_CRQUERY:
5340 case OP_CRMINQUERY:
5341 case OP_CRPOSQUERY:
5342 cc++;
5343 break;
5344
5345 case OP_CRRANGE:
5346 case OP_CRMINRANGE:
5347 case OP_CRPOSRANGE:
5348 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5349 return consumed;
5350 cc += 1 + 2 * IMM2_SIZE;
5351 break;
5352 }
5353
5354 repeat = 1;
5355 continue;
5356 }
5357
5358 len = 1;
5359 #ifdef SUPPORT_UNICODE
5360 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5361 #endif
5362
5363 if (caseless && char_has_othercase(common, cc))
5364 {
5365 #ifdef SUPPORT_UNICODE
5366 if (common->utf)
5367 {
5368 GETCHAR(chr, cc);
5369 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5370 return consumed;
5371 }
5372 else
5373 #endif
5374 {
5375 chr = *cc;
5376 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5377 }
5378 }
5379 else
5380 {
5381 caseless = FALSE;
5382 othercase[0] = 0; /* Stops compiler warning - PH */
5383 }
5384
5385 len_save = len;
5386 cc_save = cc;
5387 while (TRUE)
5388 {
5389 oc = othercase;
5390 do
5391 {
5392 len--;
5393 consumed++;
5394
5395 chr = *cc;
5396 add_prefix_char(*cc, chars, len == 0);
5397
5398 if (caseless)
5399 add_prefix_char(*oc, chars, len == 0);
5400
5401 if (--max_chars == 0)
5402 return consumed;
5403 chars++;
5404 cc++;
5405 oc++;
5406 }
5407 while (len > 0);
5408
5409 if (--repeat == 0)
5410 break;
5411
5412 len = len_save;
5413 cc = cc_save;
5414 }
5415
5416 repeat = 1;
5417 if (last)
5418 return consumed;
5419 }
5420 }
5421
5422 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5423 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5424 {
5425 #if PCRE2_CODE_UNIT_WIDTH == 8
5426 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5427 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5428 #elif PCRE2_CODE_UNIT_WIDTH == 16
5429 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5430 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5431 #else
5432 #error "Unknown code width"
5433 #endif
5434 }
5435 #endif
5436
5437 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
5438
5439 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)5440 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
5441 {
5442 #if PCRE2_CODE_UNIT_WIDTH == 8
5443 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5444 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
5445 #elif PCRE2_CODE_UNIT_WIDTH == 16
5446 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5447 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
5448 #else
5449 #error "Unknown code width"
5450 #endif
5451 }
5452 #endif
5453
character_to_int32(PCRE2_UCHAR chr)5454 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
5455 {
5456 sljit_u32 value = chr;
5457 #if PCRE2_CODE_UNIT_WIDTH == 8
5458 #define SSE2_COMPARE_TYPE_INDEX 0
5459 return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
5460 #elif PCRE2_CODE_UNIT_WIDTH == 16
5461 #define SSE2_COMPARE_TYPE_INDEX 1
5462 return (sljit_s32)((value << 16) | value);
5463 #elif PCRE2_CODE_UNIT_WIDTH == 32
5464 #define SSE2_COMPARE_TYPE_INDEX 2
5465 return (sljit_s32)(value);
5466 #else
5467 #error "Unsupported unit width"
5468 #endif
5469 }
5470
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg)5471 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
5472 {
5473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5474 sljit_u8 instruction[5];
5475 #else
5476 sljit_u8 instruction[4];
5477 #endif
5478
5479 SLJIT_ASSERT(dst_xmm_reg < 8);
5480
5481 /* MOVDQA xmm1, xmm2/m128 */
5482 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5483 if (src_general_reg < 8)
5484 {
5485 instruction[0] = 0x66;
5486 instruction[1] = 0x0f;
5487 instruction[2] = 0x6f;
5488 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
5489 sljit_emit_op_custom(compiler, instruction, 4);
5490 }
5491 else
5492 {
5493 instruction[0] = 0x66;
5494 instruction[1] = 0x41;
5495 instruction[2] = 0x0f;
5496 instruction[3] = 0x6f;
5497 instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
5498 sljit_emit_op_custom(compiler, instruction, 4);
5499 }
5500 #else
5501 instruction[0] = 0x66;
5502 instruction[1] = 0x0f;
5503 instruction[2] = 0x6f;
5504 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
5505 sljit_emit_op_custom(compiler, instruction, 4);
5506 #endif
5507 }
5508
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_u32 bit,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)5509 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
5510 sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
5511 {
5512 sljit_u8 instruction[4];
5513 instruction[0] = 0x66;
5514 instruction[1] = 0x0f;
5515
5516 if (char1 == char2 || bit != 0)
5517 {
5518 if (bit != 0)
5519 {
5520 /* POR xmm1, xmm2/m128 */
5521 /* instruction[0] = 0x66; */
5522 /* instruction[1] = 0x0f; */
5523 instruction[2] = 0xeb;
5524 instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
5525 sljit_emit_op_custom(compiler, instruction, 4);
5526 }
5527
5528 /* PCMPEQB/W/D xmm1, xmm2/m128 */
5529 /* instruction[0] = 0x66; */
5530 /* instruction[1] = 0x0f; */
5531 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
5532 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
5533 sljit_emit_op_custom(compiler, instruction, 4);
5534 }
5535 else
5536 {
5537 /* MOVDQA xmm1, xmm2/m128 */
5538 /* instruction[0] = 0x66; */
5539 /* instruction[1] = 0x0f; */
5540 instruction[2] = 0x6f;
5541 instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
5542 sljit_emit_op_custom(compiler, instruction, 4);
5543
5544 /* PCMPEQB/W/D xmm1, xmm2/m128 */
5545 /* instruction[0] = 0x66; */
5546 /* instruction[1] = 0x0f; */
5547 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
5548 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
5549 sljit_emit_op_custom(compiler, instruction, 4);
5550
5551 instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
5552 sljit_emit_op_custom(compiler, instruction, 4);
5553
5554 /* POR xmm1, xmm2/m128 */
5555 /* instruction[0] = 0x66; */
5556 /* instruction[1] = 0x0f; */
5557 instruction[2] = 0xeb;
5558 instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
5559 sljit_emit_op_custom(compiler, instruction, 4);
5560 }
5561 }
5562
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5563 static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5564 {
5565 DEFINE_COMPILER;
5566 struct sljit_label *start;
5567 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5568 struct sljit_label *restart;
5569 #endif
5570 struct sljit_jump *quit;
5571 struct sljit_jump *partial_quit[2];
5572 sljit_u8 instruction[8];
5573 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
5574 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
5575 sljit_s32 data_ind = 0;
5576 sljit_s32 tmp_ind = 1;
5577 sljit_s32 cmp1_ind = 2;
5578 sljit_s32 cmp2_ind = 3;
5579 sljit_u32 bit = 0;
5580
5581 SLJIT_UNUSED_ARG(offset);
5582
5583 if (char1 != char2)
5584 {
5585 bit = char1 ^ char2;
5586 if (!is_powerof2(bit))
5587 bit = 0;
5588 }
5589
5590 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5591 if (common->mode == PCRE2_JIT_COMPLETE)
5592 add_jump(compiler, &common->failed_match, partial_quit[0]);
5593
5594 /* First part (unaligned start) */
5595
5596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
5597
5598 SLJIT_ASSERT(tmp1_ind < 8);
5599
5600 /* MOVD xmm, r/m32 */
5601 instruction[0] = 0x66;
5602 instruction[1] = 0x0f;
5603 instruction[2] = 0x6e;
5604 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
5605 sljit_emit_op_custom(compiler, instruction, 4);
5606
5607 if (char1 != char2)
5608 {
5609 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
5610
5611 /* MOVD xmm, r/m32 */
5612 instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
5613 sljit_emit_op_custom(compiler, instruction, 4);
5614 }
5615
5616 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5617
5618 /* PSHUFD xmm1, xmm2/m128, imm8 */
5619 /* instruction[0] = 0x66; */
5620 /* instruction[1] = 0x0f; */
5621 instruction[2] = 0x70;
5622 instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
5623 instruction[4] = 0;
5624 sljit_emit_op_custom(compiler, instruction, 5);
5625
5626 if (char1 != char2)
5627 {
5628 /* PSHUFD xmm1, xmm2/m128, imm8 */
5629 instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
5630 sljit_emit_op_custom(compiler, instruction, 5);
5631 }
5632
5633 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5634 restart = LABEL();
5635 #endif
5636 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
5637 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
5638
5639 load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
5640 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
5641
5642 /* PMOVMSKB reg, xmm */
5643 /* instruction[0] = 0x66; */
5644 /* instruction[1] = 0x0f; */
5645 instruction[2] = 0xd7;
5646 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5647 sljit_emit_op_custom(compiler, instruction, 4);
5648
5649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5650 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
5651
5652 /* BSF r32, r/m32 */
5653 instruction[0] = 0x0f;
5654 instruction[1] = 0xbc;
5655 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5656 sljit_emit_op_custom(compiler, instruction, 3);
5657 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5658
5659 quit = JUMP(SLJIT_NOT_ZERO);
5660
5661 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5662
5663 start = LABEL();
5664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
5665
5666 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5667 if (common->mode == PCRE2_JIT_COMPLETE)
5668 add_jump(compiler, &common->failed_match, partial_quit[1]);
5669
5670 /* Second part (aligned) */
5671
5672 load_from_mem_sse2(compiler, 0, str_ptr_ind);
5673 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
5674
5675 /* PMOVMSKB reg, xmm */
5676 instruction[0] = 0x66;
5677 instruction[1] = 0x0f;
5678 instruction[2] = 0xd7;
5679 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5680 sljit_emit_op_custom(compiler, instruction, 4);
5681
5682 /* BSF r32, r/m32 */
5683 instruction[0] = 0x0f;
5684 instruction[1] = 0xbc;
5685 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5686 sljit_emit_op_custom(compiler, instruction, 3);
5687 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5688
5689 JUMPTO(SLJIT_ZERO, start);
5690
5691 JUMPHERE(quit);
5692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5693
5694 if (common->mode != PCRE2_JIT_COMPLETE)
5695 {
5696 JUMPHERE(partial_quit[0]);
5697 JUMPHERE(partial_quit[1]);
5698 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
5699 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
5700 }
5701 else
5702 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5703
5704 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5705 if (common->utf && offset > 0)
5706 {
5707 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
5708
5709 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
5710
5711 quit = jump_if_utf_char_start(compiler, TMP1);
5712
5713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5714 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5715 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5716 JUMPTO(SLJIT_JUMP, restart);
5717
5718 JUMPHERE(quit);
5719 }
5720 #endif
5721 }
5722
5723 #ifndef _WIN64
5724
max_fast_forward_char_pair_sse2_offset(void)5725 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
5726 {
5727 #if PCRE2_CODE_UNIT_WIDTH == 8
5728 return 15;
5729 #elif PCRE2_CODE_UNIT_WIDTH == 16
5730 return 7;
5731 #elif PCRE2_CODE_UNIT_WIDTH == 32
5732 return 3;
5733 #else
5734 #error "Unsupported unit width"
5735 #endif
5736 }
5737
fast_forward_char_pair_sse2(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)5738 static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
5739 PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
5740 {
5741 DEFINE_COMPILER;
5742 sljit_u32 bit1 = 0;
5743 sljit_u32 bit2 = 0;
5744 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
5745 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
5746 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
5747 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
5748 sljit_s32 data1_ind = 0;
5749 sljit_s32 data2_ind = 1;
5750 sljit_s32 tmp_ind = 2;
5751 sljit_s32 cmp1a_ind = 3;
5752 sljit_s32 cmp1b_ind = 4;
5753 sljit_s32 cmp2a_ind = 5;
5754 sljit_s32 cmp2b_ind = 6;
5755 struct sljit_label *start;
5756 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5757 struct sljit_label *restart;
5758 #endif
5759 struct sljit_jump *jump[2];
5760
5761 sljit_u8 instruction[8];
5762
5763 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
5764 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
5765 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
5766
5767 /* Initialize. */
5768 if (common->match_end_ptr != 0)
5769 {
5770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5771 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5772 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
5773
5774 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
5775 CMOV(SLJIT_LESS, STR_END, TMP1, 0);
5776 }
5777
5778 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
5779 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5780
5781 /* MOVD xmm, r/m32 */
5782 instruction[0] = 0x66;
5783 instruction[1] = 0x0f;
5784 instruction[2] = 0x6e;
5785
5786 if (char1a == char1b)
5787 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
5788 else
5789 {
5790 bit1 = char1a ^ char1b;
5791 if (is_powerof2(bit1))
5792 {
5793 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
5794 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
5795 }
5796 else
5797 {
5798 bit1 = 0;
5799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
5800 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
5801 }
5802 }
5803
5804 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
5805 sljit_emit_op_custom(compiler, instruction, 4);
5806
5807 if (char1a != char1b)
5808 {
5809 instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
5810 sljit_emit_op_custom(compiler, instruction, 4);
5811 }
5812
5813 if (char2a == char2b)
5814 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
5815 else
5816 {
5817 bit2 = char2a ^ char2b;
5818 if (is_powerof2(bit2))
5819 {
5820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
5821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
5822 }
5823 else
5824 {
5825 bit2 = 0;
5826 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
5827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
5828 }
5829 }
5830
5831 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
5832 sljit_emit_op_custom(compiler, instruction, 4);
5833
5834 if (char2a != char2b)
5835 {
5836 instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
5837 sljit_emit_op_custom(compiler, instruction, 4);
5838 }
5839
5840 /* PSHUFD xmm1, xmm2/m128, imm8 */
5841 /* instruction[0] = 0x66; */
5842 /* instruction[1] = 0x0f; */
5843 instruction[2] = 0x70;
5844 instruction[4] = 0;
5845
5846 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
5847 sljit_emit_op_custom(compiler, instruction, 5);
5848
5849 if (char1a != char1b)
5850 {
5851 instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
5852 sljit_emit_op_custom(compiler, instruction, 5);
5853 }
5854
5855 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
5856 sljit_emit_op_custom(compiler, instruction, 5);
5857
5858 if (char2a != char2b)
5859 {
5860 instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
5861 sljit_emit_op_custom(compiler, instruction, 5);
5862 }
5863
5864 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5865 restart = LABEL();
5866 #endif
5867
5868 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
5869 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
5870 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
5871 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
5872
5873 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
5874
5875 jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
5876
5877 load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
5878
5879 /* MOVDQA xmm1, xmm2/m128 */
5880 /* instruction[0] = 0x66; */
5881 /* instruction[1] = 0x0f; */
5882 instruction[2] = 0x6f;
5883 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
5884 sljit_emit_op_custom(compiler, instruction, 4);
5885
5886 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5887 /* instruction[0] = 0x66; */
5888 /* instruction[1] = 0x0f; */
5889 instruction[2] = 0x73;
5890 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
5891 instruction[4] = diff;
5892 sljit_emit_op_custom(compiler, instruction, 5);
5893
5894 /* PSRLDQ xmm1, xmm2/m128, imm8 */
5895 /* instruction[0] = 0x66; */
5896 /* instruction[1] = 0x0f; */
5897 /* instruction[2] = 0x73; */
5898 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
5899 instruction[4] = 16 - diff;
5900 sljit_emit_op_custom(compiler, instruction, 5);
5901
5902 /* POR xmm1, xmm2/m128 */
5903 /* instruction[0] = 0x66; */
5904 /* instruction[1] = 0x0f; */
5905 instruction[2] = 0xeb;
5906 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
5907 sljit_emit_op_custom(compiler, instruction, 4);
5908
5909 jump[1] = JUMP(SLJIT_JUMP);
5910
5911 JUMPHERE(jump[0]);
5912
5913 /* MOVDQA xmm1, xmm2/m128 */
5914 /* instruction[0] = 0x66; */
5915 /* instruction[1] = 0x0f; */
5916 instruction[2] = 0x6f;
5917 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
5918 sljit_emit_op_custom(compiler, instruction, 4);
5919
5920 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5921 /* instruction[0] = 0x66; */
5922 /* instruction[1] = 0x0f; */
5923 instruction[2] = 0x73;
5924 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
5925 instruction[4] = diff;
5926 sljit_emit_op_custom(compiler, instruction, 5);
5927
5928 JUMPHERE(jump[1]);
5929
5930 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
5931
5932 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
5933 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
5934
5935 /* PAND xmm1, xmm2/m128 */
5936 /* instruction[0] = 0x66; */
5937 /* instruction[1] = 0x0f; */
5938 instruction[2] = 0xdb;
5939 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
5940 sljit_emit_op_custom(compiler, instruction, 4);
5941
5942 /* PMOVMSKB reg, xmm */
5943 /* instruction[0] = 0x66; */
5944 /* instruction[1] = 0x0f; */
5945 instruction[2] = 0xd7;
5946 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
5947 sljit_emit_op_custom(compiler, instruction, 4);
5948
5949 /* Ignore matches before the first STR_PTR. */
5950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5951 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
5952
5953 /* BSF r32, r/m32 */
5954 instruction[0] = 0x0f;
5955 instruction[1] = 0xbc;
5956 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
5957 sljit_emit_op_custom(compiler, instruction, 3);
5958 sljit_set_current_flags(compiler, SLJIT_SET_Z);
5959
5960 jump[0] = JUMP(SLJIT_NOT_ZERO);
5961
5962 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5963
5964 /* Main loop. */
5965 instruction[0] = 0x66;
5966 instruction[1] = 0x0f;
5967
5968 start = LABEL();
5969
5970 load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
5971
5972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
5973 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5974
5975 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
5976
5977 /* PSRLDQ xmm1, xmm2/m128, imm8 */
5978 /* instruction[0] = 0x66; */
5979 /* instruction[1] = 0x0f; */
5980 instruction[2] = 0x73;
5981 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
5982 instruction[4] = 16 - diff;
5983 sljit_emit_op_custom(compiler, instruction, 5);
5984
5985 /* MOVDQA xmm1, xmm2/m128 */
5986 /* instruction[0] = 0x66; */
5987 /* instruction[1] = 0x0f; */
5988 instruction[2] = 0x6f;
5989 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
5990 sljit_emit_op_custom(compiler, instruction, 4);
5991
5992 /* PSLLDQ xmm1, xmm2/m128, imm8 */
5993 /* instruction[0] = 0x66; */
5994 /* instruction[1] = 0x0f; */
5995 instruction[2] = 0x73;
5996 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
5997 instruction[4] = diff;
5998 sljit_emit_op_custom(compiler, instruction, 5);
5999
6000 /* POR xmm1, xmm2/m128 */
6001 /* instruction[0] = 0x66; */
6002 /* instruction[1] = 0x0f; */
6003 instruction[2] = 0xeb;
6004 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
6005 sljit_emit_op_custom(compiler, instruction, 4);
6006
6007 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
6008 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
6009
6010 /* PAND xmm1, xmm2/m128 */
6011 /* instruction[0] = 0x66; */
6012 /* instruction[1] = 0x0f; */
6013 instruction[2] = 0xdb;
6014 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
6015 sljit_emit_op_custom(compiler, instruction, 4);
6016
6017 /* PMOVMSKB reg, xmm */
6018 /* instruction[0] = 0x66; */
6019 /* instruction[1] = 0x0f; */
6020 instruction[2] = 0xd7;
6021 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
6022 sljit_emit_op_custom(compiler, instruction, 4);
6023
6024 /* BSF r32, r/m32 */
6025 instruction[0] = 0x0f;
6026 instruction[1] = 0xbc;
6027 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
6028 sljit_emit_op_custom(compiler, instruction, 3);
6029 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6030
6031 JUMPTO(SLJIT_ZERO, start);
6032
6033 JUMPHERE(jump[0]);
6034
6035 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6036
6037 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6038
6039 if (common->match_end_ptr != 0)
6040 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6041
6042 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6043 if (common->utf)
6044 {
6045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
6046
6047 jump[0] = jump_if_utf_char_start(compiler, TMP1);
6048
6049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6050 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
6051
6052 add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
6053
6054 JUMPHERE(jump[0]);
6055 }
6056 #endif
6057
6058 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
6059
6060 if (common->match_end_ptr != 0)
6061 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6062 }
6063
check_fast_forward_char_pair_sse2(compiler_common * common,fast_forward_char_data * chars,int max)6064 static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
6065 {
6066 sljit_s32 i, j, priority, count;
6067 sljit_u32 priorities;
6068 PCRE2_UCHAR a1, a2, b1, b2;
6069
6070 priorities = 0;
6071
6072 count = 0;
6073 for (i = 0; i < max; i++)
6074 {
6075 if (chars[i].last_count > 2)
6076 {
6077 SLJIT_ASSERT(chars[i].last_count <= 7);
6078
6079 priorities |= (1 << chars[i].last_count);
6080 count++;
6081 }
6082 }
6083
6084 if (count < 2)
6085 return FALSE;
6086
6087 for (priority = 7; priority > 2; priority--)
6088 {
6089 if ((priorities & (1 << priority)) == 0)
6090 continue;
6091
6092 for (i = max - 1; i >= 1; i--)
6093 if (chars[i].last_count >= priority)
6094 {
6095 SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
6096
6097 a1 = chars[i].chars[0];
6098 a2 = chars[i].chars[1];
6099
6100 j = i - max_fast_forward_char_pair_sse2_offset();
6101 if (j < 0)
6102 j = 0;
6103
6104 while (j < i)
6105 {
6106 if (chars[j].last_count >= priority)
6107 {
6108 b1 = chars[j].chars[0];
6109 b2 = chars[j].chars[1];
6110
6111 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6112 {
6113 fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
6114 return TRUE;
6115 }
6116 }
6117 j++;
6118 }
6119 }
6120 }
6121
6122 return FALSE;
6123 }
6124
6125 #endif
6126
6127 #undef SSE2_COMPARE_TYPE_INDEX
6128
6129 #endif
6130
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6131 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6132 {
6133 DEFINE_COMPILER;
6134 struct sljit_label *start;
6135 struct sljit_jump *match;
6136 struct sljit_jump *partial_quit;
6137 PCRE2_UCHAR mask;
6138 BOOL has_match_end = (common->match_end_ptr != 0);
6139
6140 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6141
6142 if (has_match_end)
6143 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6144
6145 if (offset > 0)
6146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6147
6148 if (has_match_end)
6149 {
6150 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6151
6152 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6153 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6154 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6155 }
6156
6157 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
6158
6159 /* SSE2 accelerated first character search. */
6160
6161 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
6162 {
6163 fast_forward_first_char2_sse2(common, char1, char2, offset);
6164
6165 if (offset > 0)
6166 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6167
6168 if (has_match_end)
6169 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6170 return;
6171 }
6172
6173 #endif
6174
6175 start = LABEL();
6176
6177 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6178 if (common->mode == PCRE2_JIT_COMPLETE)
6179 add_jump(compiler, &common->failed_match, partial_quit);
6180
6181 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6183
6184 if (char1 == char2)
6185 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6186 else
6187 {
6188 mask = char1 ^ char2;
6189 if (is_powerof2(mask))
6190 {
6191 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6192 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6193 }
6194 else
6195 {
6196 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6197 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6198 JUMPHERE(match);
6199 }
6200 }
6201
6202 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6203 if (common->utf && offset > 0)
6204 {
6205 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6206 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207 }
6208 #endif
6209
6210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6211
6212 if (common->mode != PCRE2_JIT_COMPLETE)
6213 JUMPHERE(partial_quit);
6214
6215 if (has_match_end)
6216 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6217 }
6218
fast_forward_first_n_chars(compiler_common * common)6219 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6220 {
6221 DEFINE_COMPILER;
6222 struct sljit_label *start;
6223 struct sljit_jump *match;
6224 fast_forward_char_data chars[MAX_N_CHARS];
6225 sljit_s32 offset;
6226 PCRE2_UCHAR mask;
6227 PCRE2_UCHAR *char_set, *char_set_end;
6228 int i, max, from;
6229 int range_right = -1, range_len;
6230 sljit_u8 *update_table = NULL;
6231 BOOL in_range;
6232 sljit_u32 rec_count;
6233
6234 for (i = 0; i < MAX_N_CHARS; i++)
6235 {
6236 chars[i].count = 0;
6237 chars[i].last_count = 0;
6238 }
6239
6240 rec_count = 10000;
6241 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6242
6243 if (max < 1)
6244 return FALSE;
6245
6246 /* Convert last_count to priority. */
6247 for (i = 0; i < max; i++)
6248 {
6249 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6250
6251 if (chars[i].count == 1)
6252 {
6253 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6254 /* Simplifies algorithms later. */
6255 chars[i].chars[1] = chars[i].chars[0];
6256 }
6257 else if (chars[i].count == 2)
6258 {
6259 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6260
6261 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6262 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6263 else
6264 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6265 }
6266 else
6267 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6268 }
6269
6270 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
6271 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max))
6272 return TRUE;
6273 #endif
6274
6275 in_range = FALSE;
6276 /* Prevent compiler "uninitialized" warning */
6277 from = 0;
6278 range_len = 4 /* minimum length */ - 1;
6279 for (i = 0; i <= max; i++)
6280 {
6281 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6282 {
6283 range_len = i - from;
6284 range_right = i - 1;
6285 }
6286
6287 if (i < max && chars[i].count < 255)
6288 {
6289 SLJIT_ASSERT(chars[i].count > 0);
6290 if (!in_range)
6291 {
6292 in_range = TRUE;
6293 from = i;
6294 }
6295 }
6296 else
6297 in_range = FALSE;
6298 }
6299
6300 if (range_right >= 0)
6301 {
6302 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6303 if (update_table == NULL)
6304 return TRUE;
6305 memset(update_table, IN_UCHARS(range_len), 256);
6306
6307 for (i = 0; i < range_len; i++)
6308 {
6309 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6310
6311 char_set = chars[range_right - i].chars;
6312 char_set_end = char_set + chars[range_right - i].count;
6313 do
6314 {
6315 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6316 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6317 char_set++;
6318 }
6319 while (char_set < char_set_end);
6320 }
6321 }
6322
6323 offset = -1;
6324 /* Scan forward. */
6325 for (i = 0; i < max; i++)
6326 {
6327 if (range_right == i)
6328 continue;
6329
6330 if (offset == -1)
6331 {
6332 if (chars[i].last_count >= 2)
6333 offset = i;
6334 }
6335 else if (chars[offset].last_count < chars[i].last_count)
6336 offset = i;
6337 }
6338
6339 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6340
6341 if (range_right < 0)
6342 {
6343 if (offset < 0)
6344 return FALSE;
6345 /* Works regardless the value is 1 or 2. */
6346 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6347 return TRUE;
6348 }
6349
6350 SLJIT_ASSERT(range_right != offset);
6351
6352 if (common->match_end_ptr != 0)
6353 {
6354 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6355 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6356 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6357 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6358 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6359 }
6360 else
6361 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6362
6363 SLJIT_ASSERT(range_right >= 0);
6364
6365 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
6366 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6367 #endif
6368
6369 start = LABEL();
6370 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6371
6372 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6373 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6374 #else
6375 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6376 #endif
6377
6378 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
6379 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6380 #else
6381 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6382 #endif
6383 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6384 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6385
6386 if (offset >= 0)
6387 {
6388 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6390
6391 if (chars[offset].count == 1)
6392 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6393 else
6394 {
6395 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6396 if (is_powerof2(mask))
6397 {
6398 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6399 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6400 }
6401 else
6402 {
6403 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6404 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6405 JUMPHERE(match);
6406 }
6407 }
6408 }
6409
6410 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6411 if (common->utf && offset != 0)
6412 {
6413 if (offset < 0)
6414 {
6415 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6416 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6417 }
6418 else
6419 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6420
6421 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6422
6423 if (offset < 0)
6424 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6425 }
6426 #endif
6427
6428 if (offset >= 0)
6429 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6430
6431 if (common->match_end_ptr != 0)
6432 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6433 else
6434 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6435 return TRUE;
6436 }
6437
fast_forward_first_char(compiler_common * common)6438 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6439 {
6440 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6441 PCRE2_UCHAR oc;
6442
6443 oc = first_char;
6444 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6445 {
6446 oc = TABLE_GET(first_char, common->fcc, first_char);
6447 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6448 if (first_char > 127 && common->utf)
6449 oc = UCD_OTHERCASE(first_char);
6450 #endif
6451 }
6452
6453 fast_forward_first_char2(common, first_char, oc, 0);
6454 }
6455
fast_forward_newline(compiler_common * common)6456 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6457 {
6458 DEFINE_COMPILER;
6459 struct sljit_label *loop;
6460 struct sljit_jump *lastchar;
6461 struct sljit_jump *firstchar;
6462 struct sljit_jump *quit;
6463 struct sljit_jump *foundcr = NULL;
6464 struct sljit_jump *notfoundnl;
6465 jump_list *newline = NULL;
6466
6467 if (common->match_end_ptr != 0)
6468 {
6469 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6470 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6471 }
6472
6473 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6474 {
6475 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6476 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6477 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6479 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6480
6481 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6482 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6483 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6484 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6485 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6486 #endif
6487 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6488
6489 loop = LABEL();
6490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6491 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6492 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6493 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6494 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6495 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6496
6497 JUMPHERE(quit);
6498 JUMPHERE(firstchar);
6499 JUMPHERE(lastchar);
6500
6501 if (common->match_end_ptr != 0)
6502 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6503 return;
6504 }
6505
6506 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6507 /* Example: match /^/ to \r\n from offset 1. */
6508 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6509 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6510 move_back(common, NULL, FALSE);
6511
6512 loop = LABEL();
6513 common->ff_newline_shortcut = loop;
6514
6515 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6516 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6517 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6518 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6519 check_newlinechar(common, common->nltype, &newline, FALSE);
6520 set_jumps(newline, loop);
6521
6522 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6523 {
6524 quit = JUMP(SLJIT_JUMP);
6525 JUMPHERE(foundcr);
6526 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6528 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6529 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6530 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6531 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6532 #endif
6533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6534 JUMPHERE(notfoundnl);
6535 JUMPHERE(quit);
6536 }
6537 JUMPHERE(lastchar);
6538 JUMPHERE(firstchar);
6539
6540 if (common->match_end_ptr != 0)
6541 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6542 }
6543
6544 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6545
fast_forward_start_bits(compiler_common * common)6546 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6547 {
6548 DEFINE_COMPILER;
6549 const sljit_u8 *start_bits = common->re->start_bitmap;
6550 struct sljit_label *start;
6551 struct sljit_jump *partial_quit;
6552 #if PCRE2_CODE_UNIT_WIDTH != 8
6553 struct sljit_jump *found = NULL;
6554 #endif
6555 jump_list *matches = NULL;
6556
6557 if (common->match_end_ptr != 0)
6558 {
6559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6560 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6561 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6562 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6563 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6564 }
6565
6566 start = LABEL();
6567
6568 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6569 if (common->mode == PCRE2_JIT_COMPLETE)
6570 add_jump(compiler, &common->failed_match, partial_quit);
6571
6572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6573 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6574
6575 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6576 {
6577 #if PCRE2_CODE_UNIT_WIDTH != 8
6578 if ((start_bits[31] & 0x80) != 0)
6579 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6580 else
6581 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6582 #elif defined SUPPORT_UNICODE
6583 if (common->utf && is_char7_bitset(start_bits, FALSE))
6584 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6585 #endif
6586 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6587 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6588 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6589 if (sljit_get_register_index(TMP3) >= 0)
6590 {
6591 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6592 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6593 }
6594 else
6595 {
6596 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6597 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6598 }
6599 JUMPTO(SLJIT_ZERO, start);
6600 }
6601 else
6602 set_jumps(matches, start);
6603
6604 #if PCRE2_CODE_UNIT_WIDTH != 8
6605 if (found != NULL)
6606 JUMPHERE(found);
6607 #endif
6608
6609 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6610
6611 if (common->mode != PCRE2_JIT_COMPLETE)
6612 JUMPHERE(partial_quit);
6613
6614 if (common->match_end_ptr != 0)
6615 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6616 }
6617
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6618 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6619 {
6620 DEFINE_COMPILER;
6621 struct sljit_label *loop;
6622 struct sljit_jump *toolong;
6623 struct sljit_jump *alreadyfound;
6624 struct sljit_jump *found;
6625 struct sljit_jump *foundoc = NULL;
6626 struct sljit_jump *notfound;
6627 sljit_u32 oc, bit;
6628
6629 SLJIT_ASSERT(common->req_char_ptr != 0);
6630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6631 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
6632 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
6633 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
6634
6635 if (has_firstchar)
6636 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6637 else
6638 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6639
6640 loop = LABEL();
6641 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
6642
6643 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6644 oc = req_char;
6645 if (caseless)
6646 {
6647 oc = TABLE_GET(req_char, common->fcc, req_char);
6648 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6649 if (req_char > 127 && common->utf)
6650 oc = UCD_OTHERCASE(req_char);
6651 #endif
6652 }
6653 if (req_char == oc)
6654 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6655 else
6656 {
6657 bit = req_char ^ oc;
6658 if (is_powerof2(bit))
6659 {
6660 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6661 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6662 }
6663 else
6664 {
6665 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6666 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6667 }
6668 }
6669 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6670 JUMPTO(SLJIT_JUMP, loop);
6671
6672 JUMPHERE(found);
6673 if (foundoc)
6674 JUMPHERE(foundoc);
6675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6676 JUMPHERE(alreadyfound);
6677 JUMPHERE(toolong);
6678 return notfound;
6679 }
6680
do_revertframes(compiler_common * common)6681 static void do_revertframes(compiler_common *common)
6682 {
6683 DEFINE_COMPILER;
6684 struct sljit_jump *jump;
6685 struct sljit_label *mainloop;
6686
6687 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6688 GET_LOCAL_BASE(TMP1, 0, 0);
6689
6690 /* Drop frames until we reach STACK_TOP. */
6691 mainloop = LABEL();
6692 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6693 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6694
6695 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6696 if (sljit_get_register_index(TMP3) < 0)
6697 {
6698 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6699 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6700 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6701 }
6702 else
6703 {
6704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6705 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6706 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6707 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6708 GET_LOCAL_BASE(TMP1, 0, 0);
6709 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6710 }
6711 JUMPTO(SLJIT_JUMP, mainloop);
6712
6713 JUMPHERE(jump);
6714 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6715 /* End of reverting values. */
6716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6717
6718 JUMPHERE(jump);
6719 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6720 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6721 if (sljit_get_register_index(TMP3) < 0)
6722 {
6723 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6724 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6725 }
6726 else
6727 {
6728 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6729 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6730 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6731 }
6732 JUMPTO(SLJIT_JUMP, mainloop);
6733 }
6734
check_wordboundary(compiler_common * common)6735 static void check_wordboundary(compiler_common *common)
6736 {
6737 DEFINE_COMPILER;
6738 struct sljit_jump *skipread;
6739 jump_list *skipread_list = NULL;
6740 jump_list *invalid_utf = NULL;
6741 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6742 struct sljit_jump *jump;
6743 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6744
6745 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6746
6747 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6748 /* Get type of the previous char, and put it to TMP3. */
6749 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6750 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6751 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6752 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6753
6754 if (common->mode == PCRE2_JIT_COMPLETE)
6755 peek_char_back(common, READ_CHAR_MAX, &invalid_utf);
6756 else
6757 {
6758 move_back(common, &invalid_utf, FALSE);
6759 check_start_used_ptr(common);
6760 /* No need precise read since match fails anyway. */
6761 read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR);
6762 }
6763
6764 /* Testing char type. */
6765 #ifdef SUPPORT_UNICODE
6766 if (common->use_ucp)
6767 {
6768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6769 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6770 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6771 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6772 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6773 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6774 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6775 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6776 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6777 JUMPHERE(jump);
6778 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6779 }
6780 else
6781 #endif /* SUPPORT_UNICODE */
6782 {
6783 #if PCRE2_CODE_UNIT_WIDTH != 8
6784 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6785 #elif defined SUPPORT_UNICODE
6786 /* Here TMP3 has already been zeroed. */
6787 jump = NULL;
6788 if (common->utf)
6789 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6790 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6791 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6792 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6793 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6794 #if PCRE2_CODE_UNIT_WIDTH != 8
6795 JUMPHERE(jump);
6796 #elif defined SUPPORT_UNICODE
6797 if (jump != NULL)
6798 JUMPHERE(jump);
6799 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6800 }
6801 JUMPHERE(skipread);
6802
6803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6804 check_str_end(common, &skipread_list);
6805 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf);
6806
6807 /* Testing char type. This is a code duplication. */
6808 #ifdef SUPPORT_UNICODE
6809 if (common->use_ucp)
6810 {
6811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6812 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6813 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6814 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6815 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6816 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6817 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6818 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6819 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6820 JUMPHERE(jump);
6821 }
6822 else
6823 #endif /* SUPPORT_UNICODE */
6824 {
6825 #if PCRE2_CODE_UNIT_WIDTH != 8
6826 /* TMP2 may be destroyed by peek_char. */
6827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6828 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6829 #elif defined SUPPORT_UNICODE
6830 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6831 jump = NULL;
6832 if (common->utf)
6833 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6834 #endif
6835 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6836 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6837 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6838 #if PCRE2_CODE_UNIT_WIDTH != 8
6839 JUMPHERE(jump);
6840 #elif defined SUPPORT_UNICODE
6841 if (jump != NULL)
6842 JUMPHERE(jump);
6843 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6844 }
6845 set_jumps(skipread_list, LABEL());
6846
6847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6848 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6849 sljit_emit_fast_return(compiler, TMP1, 0);
6850
6851 #ifdef SUPPORT_UNICODE
6852 if (common->invalid_utf)
6853 {
6854 SLJIT_ASSERT(invalid_utf != NULL);
6855
6856 set_jumps(invalid_utf, LABEL());
6857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6859 sljit_emit_fast_return(compiler, TMP1, 0);
6860 return;
6861 }
6862 #endif /* SUPPORT_UNICODE */
6863 }
6864
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6865 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6866 {
6867 /* May destroy TMP1. */
6868 DEFINE_COMPILER;
6869 int ranges[MAX_CLASS_RANGE_SIZE];
6870 sljit_u8 bit, cbit, all;
6871 int i, byte, length = 0;
6872
6873 bit = bits[0] & 0x1;
6874 /* All bits will be zero or one (since bit is zero or one). */
6875 all = -bit;
6876
6877 for (i = 0; i < 256; )
6878 {
6879 byte = i >> 3;
6880 if ((i & 0x7) == 0 && bits[byte] == all)
6881 i += 8;
6882 else
6883 {
6884 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6885 if (cbit != bit)
6886 {
6887 if (length >= MAX_CLASS_RANGE_SIZE)
6888 return FALSE;
6889 ranges[length] = i;
6890 length++;
6891 bit = cbit;
6892 all = -cbit;
6893 }
6894 i++;
6895 }
6896 }
6897
6898 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6899 {
6900 if (length >= MAX_CLASS_RANGE_SIZE)
6901 return FALSE;
6902 ranges[length] = 256;
6903 length++;
6904 }
6905
6906 if (length < 0 || length > 4)
6907 return FALSE;
6908
6909 bit = bits[0] & 0x1;
6910 if (invert) bit ^= 0x1;
6911
6912 /* No character is accepted. */
6913 if (length == 0 && bit == 0)
6914 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6915
6916 switch(length)
6917 {
6918 case 0:
6919 /* When bit != 0, all characters are accepted. */
6920 return TRUE;
6921
6922 case 1:
6923 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6924 return TRUE;
6925
6926 case 2:
6927 if (ranges[0] + 1 != ranges[1])
6928 {
6929 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6930 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6931 }
6932 else
6933 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6934 return TRUE;
6935
6936 case 3:
6937 if (bit != 0)
6938 {
6939 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6940 if (ranges[0] + 1 != ranges[1])
6941 {
6942 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6943 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6944 }
6945 else
6946 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6947 return TRUE;
6948 }
6949
6950 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6951 if (ranges[1] + 1 != ranges[2])
6952 {
6953 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6954 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6955 }
6956 else
6957 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6958 return TRUE;
6959
6960 case 4:
6961 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6962 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6963 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6964 && is_powerof2(ranges[2] - ranges[0]))
6965 {
6966 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6967 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6968 if (ranges[2] + 1 != ranges[3])
6969 {
6970 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6971 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6972 }
6973 else
6974 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6975 return TRUE;
6976 }
6977
6978 if (bit != 0)
6979 {
6980 i = 0;
6981 if (ranges[0] + 1 != ranges[1])
6982 {
6983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6984 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6985 i = ranges[0];
6986 }
6987 else
6988 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6989
6990 if (ranges[2] + 1 != ranges[3])
6991 {
6992 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6993 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6994 }
6995 else
6996 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6997 return TRUE;
6998 }
6999
7000 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7001 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7002 if (ranges[1] + 1 != ranges[2])
7003 {
7004 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7005 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7006 }
7007 else
7008 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7009 return TRUE;
7010
7011 default:
7012 SLJIT_UNREACHABLE();
7013 return FALSE;
7014 }
7015 }
7016
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7017 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7018 {
7019 /* May destroy TMP1. */
7020 DEFINE_COMPILER;
7021 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7022 uint8_t byte;
7023 sljit_s32 type;
7024 int i, j, k, len, c;
7025
7026 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7027 return FALSE;
7028
7029 len = 0;
7030
7031 for (i = 0; i < 32; i++)
7032 {
7033 byte = bits[i];
7034
7035 if (nclass)
7036 byte = ~byte;
7037
7038 j = 0;
7039 while (byte != 0)
7040 {
7041 if (byte & 0x1)
7042 {
7043 c = i * 8 + j;
7044
7045 k = len;
7046
7047 if ((c & 0x20) != 0)
7048 {
7049 for (k = 0; k < len; k++)
7050 if (char_list[k] == c - 0x20)
7051 {
7052 char_list[k] |= 0x120;
7053 break;
7054 }
7055 }
7056
7057 if (k == len)
7058 {
7059 if (len >= MAX_CLASS_CHARS_SIZE)
7060 return FALSE;
7061
7062 char_list[len++] = (uint16_t) c;
7063 }
7064 }
7065
7066 byte >>= 1;
7067 j++;
7068 }
7069 }
7070
7071 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7072
7073 i = 0;
7074 j = 0;
7075
7076 if (char_list[0] == 0)
7077 {
7078 i++;
7079 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
7080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7081 }
7082 else
7083 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7084
7085 while (i < len)
7086 {
7087 if ((char_list[i] & 0x100) != 0)
7088 j++;
7089 else
7090 {
7091 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
7092 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7093 }
7094 i++;
7095 }
7096
7097 if (j != 0)
7098 {
7099 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7100
7101 for (i = 0; i < len; i++)
7102 if ((char_list[i] & 0x100) != 0)
7103 {
7104 j--;
7105 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7106 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7107 }
7108 }
7109
7110 if (invert)
7111 nclass = !nclass;
7112
7113 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7114 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7115 return TRUE;
7116 }
7117
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7118 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7119 {
7120 /* May destroy TMP1. */
7121 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7122 return TRUE;
7123 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7124 }
7125
check_anynewline(compiler_common * common)7126 static void check_anynewline(compiler_common *common)
7127 {
7128 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7129 DEFINE_COMPILER;
7130
7131 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7132
7133 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7134 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7136 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7137 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7138 #if PCRE2_CODE_UNIT_WIDTH == 8
7139 if (common->utf)
7140 {
7141 #endif
7142 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7143 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7144 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7145 #if PCRE2_CODE_UNIT_WIDTH == 8
7146 }
7147 #endif
7148 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7149 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7150 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7151 }
7152
check_hspace(compiler_common * common)7153 static void check_hspace(compiler_common *common)
7154 {
7155 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7156 DEFINE_COMPILER;
7157
7158 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7159
7160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7161 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7162 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7163 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7164 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7165 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7166 #if PCRE2_CODE_UNIT_WIDTH == 8
7167 if (common->utf)
7168 {
7169 #endif
7170 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7171 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7172 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7173 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7174 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7175 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7176 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7177 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7178 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7180 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7181 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7182 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7183 #if PCRE2_CODE_UNIT_WIDTH == 8
7184 }
7185 #endif
7186 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7187 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7188
7189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7190 }
7191
check_vspace(compiler_common * common)7192 static void check_vspace(compiler_common *common)
7193 {
7194 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7195 DEFINE_COMPILER;
7196
7197 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7198
7199 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7200 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7201 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7202 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7203 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7204 #if PCRE2_CODE_UNIT_WIDTH == 8
7205 if (common->utf)
7206 {
7207 #endif
7208 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7209 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7210 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7211 #if PCRE2_CODE_UNIT_WIDTH == 8
7212 }
7213 #endif
7214 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7215 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7216
7217 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
7218 }
7219
do_casefulcmp(compiler_common * common)7220 static void do_casefulcmp(compiler_common *common)
7221 {
7222 DEFINE_COMPILER;
7223 struct sljit_jump *jump;
7224 struct sljit_label *label;
7225 int char1_reg;
7226 int char2_reg;
7227
7228 if (sljit_get_register_index(TMP3) < 0)
7229 {
7230 char1_reg = STR_END;
7231 char2_reg = STACK_TOP;
7232 }
7233 else
7234 {
7235 char1_reg = TMP3;
7236 char2_reg = RETURN_ADDR;
7237 }
7238
7239 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7240 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7241
7242 if (char1_reg == STR_END)
7243 {
7244 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7245 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7246 }
7247
7248 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7249 {
7250 label = LABEL();
7251 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7252 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7253 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7254 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7255 JUMPTO(SLJIT_NOT_ZERO, label);
7256
7257 JUMPHERE(jump);
7258 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7259 }
7260 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7261 {
7262 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7263 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7264
7265 label = LABEL();
7266 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7267 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7268 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7269 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7270 JUMPTO(SLJIT_NOT_ZERO, label);
7271
7272 JUMPHERE(jump);
7273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7275 }
7276 else
7277 {
7278 label = LABEL();
7279 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7280 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7281 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7282 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7283 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7284 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7285 JUMPTO(SLJIT_NOT_ZERO, label);
7286
7287 JUMPHERE(jump);
7288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7289 }
7290
7291 if (char1_reg == STR_END)
7292 {
7293 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7294 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7295 }
7296
7297 sljit_emit_fast_return(compiler, TMP1, 0);
7298 }
7299
do_caselesscmp(compiler_common * common)7300 static void do_caselesscmp(compiler_common *common)
7301 {
7302 DEFINE_COMPILER;
7303 struct sljit_jump *jump;
7304 struct sljit_label *label;
7305 int char1_reg = STR_END;
7306 int char2_reg;
7307 int lcc_table;
7308 int opt_type = 0;
7309
7310 if (sljit_get_register_index(TMP3) < 0)
7311 {
7312 char2_reg = STACK_TOP;
7313 lcc_table = STACK_LIMIT;
7314 }
7315 else
7316 {
7317 char2_reg = RETURN_ADDR;
7318 lcc_table = TMP3;
7319 }
7320
7321 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7322 opt_type = 1;
7323 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7324 opt_type = 2;
7325
7326 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7327 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7328
7329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7330
7331 if (char2_reg == STACK_TOP)
7332 {
7333 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7334 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7335 }
7336
7337 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7338
7339 if (opt_type == 1)
7340 {
7341 label = LABEL();
7342 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7343 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7344 }
7345 else if (opt_type == 2)
7346 {
7347 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7348 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7349
7350 label = LABEL();
7351 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7352 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7353 }
7354 else
7355 {
7356 label = LABEL();
7357 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7358 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7359 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7360 }
7361
7362 #if PCRE2_CODE_UNIT_WIDTH != 8
7363 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7364 #endif
7365 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7366 #if PCRE2_CODE_UNIT_WIDTH != 8
7367 JUMPHERE(jump);
7368 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7369 #endif
7370 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7371 #if PCRE2_CODE_UNIT_WIDTH != 8
7372 JUMPHERE(jump);
7373 #endif
7374
7375 if (opt_type == 0)
7376 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7377
7378 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7379 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7380 JUMPTO(SLJIT_NOT_ZERO, label);
7381
7382 JUMPHERE(jump);
7383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7384
7385 if (opt_type == 2)
7386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7387
7388 if (char2_reg == STACK_TOP)
7389 {
7390 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7391 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7392 }
7393
7394 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7395 sljit_emit_fast_return(compiler, TMP1, 0);
7396 }
7397
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7398 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7399 compare_context *context, jump_list **backtracks)
7400 {
7401 DEFINE_COMPILER;
7402 unsigned int othercasebit = 0;
7403 PCRE2_SPTR othercasechar = NULL;
7404 #ifdef SUPPORT_UNICODE
7405 int utflength;
7406 #endif
7407
7408 if (caseless && char_has_othercase(common, cc))
7409 {
7410 othercasebit = char_get_othercase_bit(common, cc);
7411 SLJIT_ASSERT(othercasebit);
7412 /* Extracting bit difference info. */
7413 #if PCRE2_CODE_UNIT_WIDTH == 8
7414 othercasechar = cc + (othercasebit >> 8);
7415 othercasebit &= 0xff;
7416 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7417 /* Note that this code only handles characters in the BMP. If there
7418 ever are characters outside the BMP whose othercase differs in only one
7419 bit from itself (there currently are none), this code will need to be
7420 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7421 othercasechar = cc + (othercasebit >> 9);
7422 if ((othercasebit & 0x100) != 0)
7423 othercasebit = (othercasebit & 0xff) << 8;
7424 else
7425 othercasebit &= 0xff;
7426 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7427 }
7428
7429 if (context->sourcereg == -1)
7430 {
7431 #if PCRE2_CODE_UNIT_WIDTH == 8
7432 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7433 if (context->length >= 4)
7434 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7435 else if (context->length >= 2)
7436 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7437 else
7438 #endif
7439 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7440 #elif PCRE2_CODE_UNIT_WIDTH == 16
7441 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7442 if (context->length >= 4)
7443 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7444 else
7445 #endif
7446 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7447 #elif PCRE2_CODE_UNIT_WIDTH == 32
7448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7449 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7450 context->sourcereg = TMP2;
7451 }
7452
7453 #ifdef SUPPORT_UNICODE
7454 utflength = 1;
7455 if (common->utf && HAS_EXTRALEN(*cc))
7456 utflength += GET_EXTRALEN(*cc);
7457
7458 do
7459 {
7460 #endif
7461
7462 context->length -= IN_UCHARS(1);
7463 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7464
7465 /* Unaligned read is supported. */
7466 if (othercasebit != 0 && othercasechar == cc)
7467 {
7468 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7469 context->oc.asuchars[context->ucharptr] = othercasebit;
7470 }
7471 else
7472 {
7473 context->c.asuchars[context->ucharptr] = *cc;
7474 context->oc.asuchars[context->ucharptr] = 0;
7475 }
7476 context->ucharptr++;
7477
7478 #if PCRE2_CODE_UNIT_WIDTH == 8
7479 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7480 #else
7481 if (context->ucharptr >= 2 || context->length == 0)
7482 #endif
7483 {
7484 if (context->length >= 4)
7485 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7486 else if (context->length >= 2)
7487 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7488 #if PCRE2_CODE_UNIT_WIDTH == 8
7489 else if (context->length >= 1)
7490 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7491 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7492 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7493
7494 switch(context->ucharptr)
7495 {
7496 case 4 / sizeof(PCRE2_UCHAR):
7497 if (context->oc.asint != 0)
7498 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7499 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7500 break;
7501
7502 case 2 / sizeof(PCRE2_UCHAR):
7503 if (context->oc.asushort != 0)
7504 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7505 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7506 break;
7507
7508 #if PCRE2_CODE_UNIT_WIDTH == 8
7509 case 1:
7510 if (context->oc.asbyte != 0)
7511 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7512 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7513 break;
7514 #endif
7515
7516 default:
7517 SLJIT_UNREACHABLE();
7518 break;
7519 }
7520 context->ucharptr = 0;
7521 }
7522
7523 #else
7524
7525 /* Unaligned read is unsupported or in 32 bit mode. */
7526 if (context->length >= 1)
7527 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528
7529 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7530
7531 if (othercasebit != 0 && othercasechar == cc)
7532 {
7533 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7534 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7535 }
7536 else
7537 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7538
7539 #endif
7540
7541 cc++;
7542 #ifdef SUPPORT_UNICODE
7543 utflength--;
7544 }
7545 while (utflength > 0);
7546 #endif
7547
7548 return cc;
7549 }
7550
7551 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7552
7553 #define SET_TYPE_OFFSET(value) \
7554 if ((value) != typeoffset) \
7555 { \
7556 if ((value) < typeoffset) \
7557 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7558 else \
7559 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7560 } \
7561 typeoffset = (value);
7562
7563 #define SET_CHAR_OFFSET(value) \
7564 if ((value) != charoffset) \
7565 { \
7566 if ((value) < charoffset) \
7567 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7568 else \
7569 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7570 } \
7571 charoffset = (value);
7572
7573 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7574
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7575 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7576 {
7577 DEFINE_COMPILER;
7578 jump_list *found = NULL;
7579 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7580 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7581 struct sljit_jump *jump = NULL;
7582 PCRE2_SPTR ccbegin;
7583 int compares, invertcmp, numberofcmps;
7584 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7585 BOOL utf = common->utf;
7586 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7587
7588 #ifdef SUPPORT_UNICODE
7589 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7590 BOOL charsaved = FALSE;
7591 int typereg = TMP1;
7592 const sljit_u32 *other_cases;
7593 sljit_uw typeoffset;
7594 #endif /* SUPPORT_UNICODE */
7595
7596 /* Scanning the necessary info. */
7597 cc++;
7598 ccbegin = cc;
7599 compares = 0;
7600
7601 if (cc[-1] & XCL_MAP)
7602 {
7603 min = 0;
7604 cc += 32 / sizeof(PCRE2_UCHAR);
7605 }
7606
7607 while (*cc != XCL_END)
7608 {
7609 compares++;
7610 if (*cc == XCL_SINGLE)
7611 {
7612 cc ++;
7613 GETCHARINCTEST(c, cc);
7614 if (c > max) max = c;
7615 if (c < min) min = c;
7616 #ifdef SUPPORT_UNICODE
7617 needschar = TRUE;
7618 #endif /* SUPPORT_UNICODE */
7619 }
7620 else if (*cc == XCL_RANGE)
7621 {
7622 cc ++;
7623 GETCHARINCTEST(c, cc);
7624 if (c < min) min = c;
7625 GETCHARINCTEST(c, cc);
7626 if (c > max) max = c;
7627 #ifdef SUPPORT_UNICODE
7628 needschar = TRUE;
7629 #endif /* SUPPORT_UNICODE */
7630 }
7631 #ifdef SUPPORT_UNICODE
7632 else
7633 {
7634 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7635 cc++;
7636 if (*cc == PT_CLIST)
7637 {
7638 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7639 while (*other_cases != NOTACHAR)
7640 {
7641 if (*other_cases > max) max = *other_cases;
7642 if (*other_cases < min) min = *other_cases;
7643 other_cases++;
7644 }
7645 }
7646 else
7647 {
7648 max = READ_CHAR_MAX;
7649 min = 0;
7650 }
7651
7652 switch(*cc)
7653 {
7654 case PT_ANY:
7655 /* Any either accepts everything or ignored. */
7656 if (cc[-1] == XCL_PROP)
7657 {
7658 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7659 if (list == backtracks)
7660 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7661 return;
7662 }
7663 break;
7664
7665 case PT_LAMP:
7666 case PT_GC:
7667 case PT_PC:
7668 case PT_ALNUM:
7669 needstype = TRUE;
7670 break;
7671
7672 case PT_SC:
7673 needsscript = TRUE;
7674 break;
7675
7676 case PT_SPACE:
7677 case PT_PXSPACE:
7678 case PT_WORD:
7679 case PT_PXGRAPH:
7680 case PT_PXPRINT:
7681 case PT_PXPUNCT:
7682 needstype = TRUE;
7683 needschar = TRUE;
7684 break;
7685
7686 case PT_CLIST:
7687 case PT_UCNC:
7688 needschar = TRUE;
7689 break;
7690
7691 default:
7692 SLJIT_UNREACHABLE();
7693 break;
7694 }
7695 cc += 2;
7696 }
7697 #endif /* SUPPORT_UNICODE */
7698 }
7699 SLJIT_ASSERT(compares > 0);
7700
7701 /* We are not necessary in utf mode even in 8 bit mode. */
7702 cc = ccbegin;
7703 if ((cc[-1] & XCL_NOT) != 0)
7704 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7705 else
7706 read_char(common, min, max, NULL, 0);
7707
7708 if ((cc[-1] & XCL_HASPROP) == 0)
7709 {
7710 if ((cc[-1] & XCL_MAP) != 0)
7711 {
7712 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7713 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7714 {
7715 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7716 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7717 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7718 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7719 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7720 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7721 }
7722
7723 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7724 JUMPHERE(jump);
7725
7726 cc += 32 / sizeof(PCRE2_UCHAR);
7727 }
7728 else
7729 {
7730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7731 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7732 }
7733 }
7734 else if ((cc[-1] & XCL_MAP) != 0)
7735 {
7736 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7737 #ifdef SUPPORT_UNICODE
7738 charsaved = TRUE;
7739 #endif /* SUPPORT_UNICODE */
7740 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7741 {
7742 #if PCRE2_CODE_UNIT_WIDTH == 8
7743 jump = NULL;
7744 if (common->utf)
7745 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7746 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7747
7748 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7749 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7750 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7751 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7752 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7753 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7754
7755 #if PCRE2_CODE_UNIT_WIDTH == 8
7756 if (common->utf)
7757 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7758 JUMPHERE(jump);
7759 }
7760
7761 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7762 cc += 32 / sizeof(PCRE2_UCHAR);
7763 }
7764
7765 #ifdef SUPPORT_UNICODE
7766 if (needstype || needsscript)
7767 {
7768 if (needschar && !charsaved)
7769 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7770
7771 #if PCRE2_CODE_UNIT_WIDTH == 32
7772 if (!common->utf)
7773 {
7774 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7776 JUMPHERE(jump);
7777 }
7778 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7779
7780 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7781 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7782 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7783 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7784 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7785 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7787 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7788
7789 /* Before anything else, we deal with scripts. */
7790 if (needsscript)
7791 {
7792 // PH hacking
7793 //fprintf(stderr, "~~B\n");
7794
7795 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7796 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7797 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7798
7799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7800
7801 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
7802
7803 // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
7804
7805 ccbegin = cc;
7806
7807 while (*cc != XCL_END)
7808 {
7809 if (*cc == XCL_SINGLE)
7810 {
7811 cc ++;
7812 GETCHARINCTEST(c, cc);
7813 }
7814 else if (*cc == XCL_RANGE)
7815 {
7816 cc ++;
7817 GETCHARINCTEST(c, cc);
7818 GETCHARINCTEST(c, cc);
7819 }
7820 else
7821 {
7822 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7823 cc++;
7824 if (*cc == PT_SC)
7825 {
7826 compares--;
7827 invertcmp = (compares == 0 && list != backtracks);
7828 if (cc[-1] == XCL_NOTPROP)
7829 invertcmp ^= 0x1;
7830 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7831 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7832 }
7833 cc += 2;
7834 }
7835 }
7836
7837 cc = ccbegin;
7838 }
7839
7840 if (needschar)
7841 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7842
7843 if (needstype)
7844 {
7845 if (!needschar)
7846 {
7847 // PH hacking
7848 //fprintf(stderr, "~~C\n");
7849 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7850 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7851 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7852 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
7853
7854 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7855
7856 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
7857
7858 // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
7859 }
7860 else
7861 {
7862 // PH hacking
7863 //fprintf(stderr, "~~D\n");
7864 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
7865
7866 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
7867
7868 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7869 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7870
7871 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7872 typereg = RETURN_ADDR;
7873 }
7874 }
7875 }
7876 #endif /* SUPPORT_UNICODE */
7877
7878 /* Generating code. */
7879 charoffset = 0;
7880 numberofcmps = 0;
7881 #ifdef SUPPORT_UNICODE
7882 typeoffset = 0;
7883 #endif /* SUPPORT_UNICODE */
7884
7885 while (*cc != XCL_END)
7886 {
7887 compares--;
7888 invertcmp = (compares == 0 && list != backtracks);
7889 jump = NULL;
7890
7891 if (*cc == XCL_SINGLE)
7892 {
7893 cc ++;
7894 GETCHARINCTEST(c, cc);
7895
7896 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7897 {
7898 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7899 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7900 numberofcmps++;
7901 }
7902 else if (numberofcmps > 0)
7903 {
7904 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7905 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7906 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7907 numberofcmps = 0;
7908 }
7909 else
7910 {
7911 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7912 numberofcmps = 0;
7913 }
7914 }
7915 else if (*cc == XCL_RANGE)
7916 {
7917 cc ++;
7918 GETCHARINCTEST(c, cc);
7919 SET_CHAR_OFFSET(c);
7920 GETCHARINCTEST(c, cc);
7921
7922 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7923 {
7924 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7925 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7926 numberofcmps++;
7927 }
7928 else if (numberofcmps > 0)
7929 {
7930 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7931 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7932 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7933 numberofcmps = 0;
7934 }
7935 else
7936 {
7937 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7938 numberofcmps = 0;
7939 }
7940 }
7941 #ifdef SUPPORT_UNICODE
7942 else
7943 {
7944 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7945 if (*cc == XCL_NOTPROP)
7946 invertcmp ^= 0x1;
7947 cc++;
7948 switch(*cc)
7949 {
7950 case PT_ANY:
7951 if (!invertcmp)
7952 jump = JUMP(SLJIT_JUMP);
7953 break;
7954
7955 case PT_LAMP:
7956 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7957 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7958 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7959 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7960 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7961 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7962 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7963 break;
7964
7965 case PT_GC:
7966 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7967 SET_TYPE_OFFSET(c);
7968 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7969 break;
7970
7971 case PT_PC:
7972 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7973 break;
7974
7975 case PT_SC:
7976 compares++;
7977 /* Do nothing. */
7978 break;
7979
7980 case PT_SPACE:
7981 case PT_PXSPACE:
7982 SET_CHAR_OFFSET(9);
7983 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7984 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7985
7986 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7987 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7988
7989 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7990 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7991
7992 SET_TYPE_OFFSET(ucp_Zl);
7993 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7994 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7995 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7996 break;
7997
7998 case PT_WORD:
7999 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8000 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8001 /* Fall through. */
8002
8003 case PT_ALNUM:
8004 SET_TYPE_OFFSET(ucp_Ll);
8005 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8006 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8007 SET_TYPE_OFFSET(ucp_Nd);
8008 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8009 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8010 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8011 break;
8012
8013 case PT_CLIST:
8014 other_cases = PRIV(ucd_caseless_sets) + cc[1];
8015
8016 /* At least three characters are required.
8017 Otherwise this case would be handled by the normal code path. */
8018 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8019 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8020
8021 /* Optimizing character pairs, if their difference is power of 2. */
8022 if (is_powerof2(other_cases[1] ^ other_cases[0]))
8023 {
8024 if (charoffset == 0)
8025 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8026 else
8027 {
8028 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8029 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8030 }
8031 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
8032 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8033 other_cases += 2;
8034 }
8035 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8036 {
8037 if (charoffset == 0)
8038 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8039 else
8040 {
8041 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8042 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8043 }
8044 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
8045 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8046
8047 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8048 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8049
8050 other_cases += 3;
8051 }
8052 else
8053 {
8054 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8055 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8056 }
8057
8058 while (*other_cases != NOTACHAR)
8059 {
8060 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8061 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8062 }
8063 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8064 break;
8065
8066 case PT_UCNC:
8067 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8068 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8069 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8070 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8071 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8072 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8073
8074 SET_CHAR_OFFSET(0xa0);
8075 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8076 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8077 SET_CHAR_OFFSET(0);
8078 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8079 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8080 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8081 break;
8082
8083 case PT_PXGRAPH:
8084 /* C and Z groups are the farthest two groups. */
8085 SET_TYPE_OFFSET(ucp_Ll);
8086 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8087 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8088
8089 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8090
8091 /* In case of ucp_Cf, we overwrite the result. */
8092 SET_CHAR_OFFSET(0x2066);
8093 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8094 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8095
8096 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8097 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8098
8099 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8100 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8101
8102 JUMPHERE(jump);
8103 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8104 break;
8105
8106 case PT_PXPRINT:
8107 /* C and Z groups are the farthest two groups. */
8108 SET_TYPE_OFFSET(ucp_Ll);
8109 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8111
8112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8113 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8114
8115 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8116
8117 /* In case of ucp_Cf, we overwrite the result. */
8118 SET_CHAR_OFFSET(0x2066);
8119 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8120 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8121
8122 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8123 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8124
8125 JUMPHERE(jump);
8126 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8127 break;
8128
8129 case PT_PXPUNCT:
8130 SET_TYPE_OFFSET(ucp_Sc);
8131 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8132 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8133
8134 SET_CHAR_OFFSET(0);
8135 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
8136 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8137
8138 SET_TYPE_OFFSET(ucp_Pc);
8139 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8140 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8141 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8142 break;
8143
8144 default:
8145 SLJIT_UNREACHABLE();
8146 break;
8147 }
8148 cc += 2;
8149 }
8150 #endif /* SUPPORT_UNICODE */
8151
8152 if (jump != NULL)
8153 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8154 }
8155
8156 if (found != NULL)
8157 set_jumps(found, LABEL());
8158 }
8159
8160 #undef SET_TYPE_OFFSET
8161 #undef SET_CHAR_OFFSET
8162
8163 #endif
8164
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8165 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8166 {
8167 DEFINE_COMPILER;
8168 int length;
8169 struct sljit_jump *jump[4];
8170 #ifdef SUPPORT_UNICODE
8171 struct sljit_label *label;
8172 #endif /* SUPPORT_UNICODE */
8173
8174 switch(type)
8175 {
8176 case OP_SOD:
8177 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8179 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8180 return cc;
8181
8182 case OP_SOM:
8183 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8184 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8185 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8186 return cc;
8187
8188 case OP_NOT_WORD_BOUNDARY:
8189 case OP_WORD_BOUNDARY:
8190 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8191 #ifdef SUPPORT_UNICODE
8192 if (common->invalid_utf)
8193 {
8194 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
8195 add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS));
8196 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8197 return cc;
8198 }
8199 #endif /* SUPPORT_UNICODE */
8200 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8201 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8202 return cc;
8203
8204 case OP_EODN:
8205 /* Requires rather complex checks. */
8206 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8207 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8208 {
8209 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8210 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8211 if (common->mode == PCRE2_JIT_COMPLETE)
8212 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8213 else
8214 {
8215 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8216 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8218 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8219 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8220 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8221 check_partial(common, TRUE);
8222 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8223 JUMPHERE(jump[1]);
8224 }
8225 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8226 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8227 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8228 }
8229 else if (common->nltype == NLTYPE_FIXED)
8230 {
8231 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8233 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8234 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8235 }
8236 else
8237 {
8238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8239 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8240 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8241 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8242 jump[2] = JUMP(SLJIT_GREATER);
8243 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8244 /* Equal. */
8245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8246 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8247 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8248
8249 JUMPHERE(jump[1]);
8250 if (common->nltype == NLTYPE_ANYCRLF)
8251 {
8252 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8253 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8254 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8255 }
8256 else
8257 {
8258 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8259 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8260 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8261 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8262 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8263 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8264 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8265 }
8266 JUMPHERE(jump[2]);
8267 JUMPHERE(jump[3]);
8268 }
8269 JUMPHERE(jump[0]);
8270 check_partial(common, FALSE);
8271 return cc;
8272
8273 case OP_EOD:
8274 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8275 check_partial(common, FALSE);
8276 return cc;
8277
8278 case OP_DOLL:
8279 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8280 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8281 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8282
8283 if (!common->endonly)
8284 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8285 else
8286 {
8287 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8288 check_partial(common, FALSE);
8289 }
8290 return cc;
8291
8292 case OP_DOLLM:
8293 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8294 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8295 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8296 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8297 check_partial(common, FALSE);
8298 jump[0] = JUMP(SLJIT_JUMP);
8299 JUMPHERE(jump[1]);
8300
8301 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8302 {
8303 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8304 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8305 if (common->mode == PCRE2_JIT_COMPLETE)
8306 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8307 else
8308 {
8309 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8310 /* STR_PTR = STR_END - IN_UCHARS(1) */
8311 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8312 check_partial(common, TRUE);
8313 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8314 JUMPHERE(jump[1]);
8315 }
8316
8317 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8318 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8319 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8320 }
8321 else
8322 {
8323 peek_char(common, common->nlmax, TMP3, 0, NULL);
8324 check_newlinechar(common, common->nltype, backtracks, FALSE);
8325 }
8326 JUMPHERE(jump[0]);
8327 return cc;
8328
8329 case OP_CIRC:
8330 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8332 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8333 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8334 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8335 return cc;
8336
8337 case OP_CIRCM:
8338 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8340 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8341 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8342 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8343 jump[0] = JUMP(SLJIT_JUMP);
8344 JUMPHERE(jump[1]);
8345
8346 if (!common->alt_circumflex)
8347 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8348
8349 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8350 {
8351 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8352 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8353 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8354 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8355 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8356 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8357 }
8358 else
8359 {
8360 peek_char_back(common, common->nlmax, backtracks);
8361 check_newlinechar(common, common->nltype, backtracks, FALSE);
8362 }
8363 JUMPHERE(jump[0]);
8364 return cc;
8365
8366 case OP_REVERSE:
8367 length = GET(cc, 0);
8368 if (length == 0)
8369 return cc + LINK_SIZE;
8370 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8371 #ifdef SUPPORT_UNICODE
8372 if (common->utf)
8373 {
8374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8375 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8376 label = LABEL();
8377 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8378 move_back(common, backtracks, FALSE);
8379 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8380 JUMPTO(SLJIT_NOT_ZERO, label);
8381 }
8382 else
8383 #endif
8384 {
8385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8386 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8387 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
8388 }
8389 check_start_used_ptr(common);
8390 return cc + LINK_SIZE;
8391 }
8392 SLJIT_UNREACHABLE();
8393 return cc;
8394 }
8395
8396 #ifdef SUPPORT_UNICODE
8397
8398 #if PCRE2_CODE_UNIT_WIDTH != 32
8399
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8400 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8401 {
8402 PCRE2_SPTR start_subject = args->begin;
8403 PCRE2_SPTR end_subject = args->end;
8404 int lgb, rgb, ricount;
8405 PCRE2_SPTR prevcc, startcc, bptr;
8406 BOOL first = TRUE;
8407 uint32_t c;
8408
8409 prevcc = cc;
8410 startcc = NULL;
8411 do
8412 {
8413 GETCHARINC(c, cc);
8414 rgb = UCD_GRAPHBREAK(c);
8415
8416 if (first)
8417 {
8418 lgb = rgb;
8419 startcc = cc;
8420 first = FALSE;
8421 continue;
8422 }
8423
8424 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8425 break;
8426
8427 /* Not breaking between Regional Indicators is allowed only if there
8428 are an even number of preceding RIs. */
8429
8430 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8431 {
8432 ricount = 0;
8433 bptr = prevcc;
8434
8435 /* bptr is pointing to the left-hand character */
8436 while (bptr > start_subject)
8437 {
8438 bptr--;
8439 BACKCHAR(bptr);
8440 GETCHAR(c, bptr);
8441
8442 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8443 break;
8444
8445 ricount++;
8446 }
8447
8448 if ((ricount & 1) != 0) break; /* Grapheme break required */
8449 }
8450
8451 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8452 allows any number of them before a following Extended_Pictographic. */
8453
8454 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8455 lgb != ucp_gbExtended_Pictographic)
8456 lgb = rgb;
8457
8458 prevcc = startcc;
8459 startcc = cc;
8460 }
8461 while (cc < end_subject);
8462
8463 return startcc;
8464 }
8465
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8466 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8467 {
8468 PCRE2_SPTR start_subject = args->begin;
8469 PCRE2_SPTR end_subject = args->end;
8470 int lgb, rgb, ricount;
8471 PCRE2_SPTR prevcc, startcc, bptr;
8472 BOOL first = TRUE;
8473 uint32_t c;
8474
8475 prevcc = cc;
8476 startcc = NULL;
8477 do
8478 {
8479 GETCHARINC_INVALID(c, cc, end_subject, break);
8480 rgb = UCD_GRAPHBREAK(c);
8481
8482 if (first)
8483 {
8484 lgb = rgb;
8485 startcc = cc;
8486 first = FALSE;
8487 continue;
8488 }
8489
8490 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8491 break;
8492
8493 /* Not breaking between Regional Indicators is allowed only if there
8494 are an even number of preceding RIs. */
8495
8496 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8497 {
8498 ricount = 0;
8499 bptr = prevcc;
8500
8501 /* bptr is pointing to the left-hand character */
8502 while (bptr > start_subject)
8503 {
8504 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8505
8506 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8507 break;
8508
8509 ricount++;
8510 }
8511
8512 if ((ricount & 1) != 0)
8513 break; /* Grapheme break required */
8514 }
8515
8516 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8517 allows any number of them before a following Extended_Pictographic. */
8518
8519 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8520 lgb != ucp_gbExtended_Pictographic)
8521 lgb = rgb;
8522
8523 prevcc = startcc;
8524 startcc = cc;
8525 }
8526 while (cc < end_subject);
8527
8528 return startcc;
8529 }
8530
8531 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8532
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8533 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8534 {
8535 PCRE2_SPTR start_subject = args->begin;
8536 PCRE2_SPTR end_subject = args->end;
8537 int lgb, rgb, ricount;
8538 PCRE2_SPTR bptr;
8539 uint32_t c;
8540
8541 GETCHARINC(c, cc);
8542 #if PCRE2_CODE_UNIT_WIDTH == 32
8543 if (c >= 0x110000)
8544 return NULL;
8545 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8546 lgb = UCD_GRAPHBREAK(c);
8547
8548 while (cc < end_subject)
8549 {
8550 c = *cc;
8551 #if PCRE2_CODE_UNIT_WIDTH == 32
8552 if (c >= 0x110000)
8553 break;
8554 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8555 rgb = UCD_GRAPHBREAK(c);
8556
8557 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8558 break;
8559
8560 /* Not breaking between Regional Indicators is allowed only if there
8561 are an even number of preceding RIs. */
8562
8563 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8564 {
8565 ricount = 0;
8566 bptr = cc - 1;
8567
8568 /* bptr is pointing to the left-hand character */
8569 while (bptr > start_subject)
8570 {
8571 bptr--;
8572 c = *bptr;
8573 #if PCRE2_CODE_UNIT_WIDTH == 32
8574 if (c >= 0x110000)
8575 break;
8576 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8577
8578 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8579
8580 ricount++;
8581 }
8582
8583 if ((ricount & 1) != 0)
8584 break; /* Grapheme break required */
8585 }
8586
8587 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8588 allows any number of them before a following Extended_Pictographic. */
8589
8590 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8591 lgb != ucp_gbExtended_Pictographic)
8592 lgb = rgb;
8593
8594 cc++;
8595 }
8596
8597 return cc;
8598 }
8599
8600 #endif /* SUPPORT_UNICODE */
8601
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8602 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8603 {
8604 DEFINE_COMPILER;
8605 int length;
8606 unsigned int c, oc, bit;
8607 compare_context context;
8608 struct sljit_jump *jump[3];
8609 jump_list *end_list;
8610 #ifdef SUPPORT_UNICODE
8611 PCRE2_UCHAR propdata[5];
8612 #endif /* SUPPORT_UNICODE */
8613
8614 switch(type)
8615 {
8616 case OP_NOT_DIGIT:
8617 case OP_DIGIT:
8618 /* Digits are usually 0-9, so it is worth to optimize them. */
8619 if (check_str_ptr)
8620 detect_partial_match(common, backtracks);
8621 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8622 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8623 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8624 else
8625 #endif
8626 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8627 /* Flip the starting bit in the negative case. */
8628 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8629 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8630 return cc;
8631
8632 case OP_NOT_WHITESPACE:
8633 case OP_WHITESPACE:
8634 if (check_str_ptr)
8635 detect_partial_match(common, backtracks);
8636 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8637 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8638 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8639 else
8640 #endif
8641 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8642 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8643 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8644 return cc;
8645
8646 case OP_NOT_WORDCHAR:
8647 case OP_WORDCHAR:
8648 if (check_str_ptr)
8649 detect_partial_match(common, backtracks);
8650 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8651 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8652 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8653 else
8654 #endif
8655 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8656 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8657 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8658 return cc;
8659
8660 case OP_ANY:
8661 if (check_str_ptr)
8662 detect_partial_match(common, backtracks);
8663 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8664 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8665 {
8666 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8667 end_list = NULL;
8668 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8669 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8670 else
8671 check_str_end(common, &end_list);
8672
8673 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8674 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8675 set_jumps(end_list, LABEL());
8676 JUMPHERE(jump[0]);
8677 }
8678 else
8679 check_newlinechar(common, common->nltype, backtracks, TRUE);
8680 return cc;
8681
8682 case OP_ALLANY:
8683 if (check_str_ptr)
8684 detect_partial_match(common, backtracks);
8685 #ifdef SUPPORT_UNICODE
8686 if (common->utf)
8687 {
8688 if (common->invalid_utf)
8689 {
8690 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8691 return cc;
8692 }
8693
8694 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8695 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8697 #if PCRE2_CODE_UNIT_WIDTH == 8
8698 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8699 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8700 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8701 #elif PCRE2_CODE_UNIT_WIDTH == 16
8702 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8703 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8704 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8705 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8706 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8708 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8709 JUMPHERE(jump[0]);
8710 return cc;
8711 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8712 }
8713 #endif /* SUPPORT_UNICODE */
8714 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8715 return cc;
8716
8717 case OP_ANYBYTE:
8718 if (check_str_ptr)
8719 detect_partial_match(common, backtracks);
8720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8721 return cc;
8722
8723 #ifdef SUPPORT_UNICODE
8724 case OP_NOTPROP:
8725 case OP_PROP:
8726 propdata[0] = XCL_HASPROP;
8727 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8728 propdata[2] = cc[0];
8729 propdata[3] = cc[1];
8730 propdata[4] = XCL_END;
8731 if (check_str_ptr)
8732 detect_partial_match(common, backtracks);
8733 compile_xclass_matchingpath(common, propdata, backtracks);
8734 return cc + 2;
8735 #endif
8736
8737 case OP_ANYNL:
8738 if (check_str_ptr)
8739 detect_partial_match(common, backtracks);
8740 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8741 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8742 /* We don't need to handle soft partial matching case. */
8743 end_list = NULL;
8744 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8745 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8746 else
8747 check_str_end(common, &end_list);
8748 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8749 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8751 jump[2] = JUMP(SLJIT_JUMP);
8752 JUMPHERE(jump[0]);
8753 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8754 set_jumps(end_list, LABEL());
8755 JUMPHERE(jump[1]);
8756 JUMPHERE(jump[2]);
8757 return cc;
8758
8759 case OP_NOT_HSPACE:
8760 case OP_HSPACE:
8761 if (check_str_ptr)
8762 detect_partial_match(common, backtracks);
8763
8764 if (type == OP_NOT_HSPACE)
8765 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8766 else
8767 read_char(common, 0x9, 0x3000, NULL, 0);
8768
8769 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8770 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8771 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8772 return cc;
8773
8774 case OP_NOT_VSPACE:
8775 case OP_VSPACE:
8776 if (check_str_ptr)
8777 detect_partial_match(common, backtracks);
8778
8779 if (type == OP_NOT_VSPACE)
8780 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8781 else
8782 read_char(common, 0xa, 0x2029, NULL, 0);
8783
8784 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8785 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8786 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8787 return cc;
8788
8789 #ifdef SUPPORT_UNICODE
8790 case OP_EXTUNI:
8791 if (check_str_ptr)
8792 detect_partial_match(common, backtracks);
8793
8794 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8795 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8796
8797 #if PCRE2_CODE_UNIT_WIDTH != 32
8798 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8799 common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8800 if (common->invalid_utf)
8801 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8802 #else
8803 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8804 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8805 #endif
8806
8807 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8808
8809 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8810 {
8811 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8812 /* Since we successfully read a char above, partial matching must occure. */
8813 check_partial(common, TRUE);
8814 JUMPHERE(jump[0]);
8815 }
8816 return cc;
8817 #endif
8818
8819 case OP_CHAR:
8820 case OP_CHARI:
8821 length = 1;
8822 #ifdef SUPPORT_UNICODE
8823 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8824 #endif
8825
8826 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8827 detect_partial_match(common, backtracks);
8828
8829 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8830 {
8831 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8832 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8833 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8834
8835 context.length = IN_UCHARS(length);
8836 context.sourcereg = -1;
8837 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8838 context.ucharptr = 0;
8839 #endif
8840 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8841 }
8842
8843 #ifdef SUPPORT_UNICODE
8844 if (common->utf)
8845 {
8846 GETCHAR(c, cc);
8847 }
8848 else
8849 #endif
8850 c = *cc;
8851
8852 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8853
8854 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8855 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8856
8857 oc = char_othercase(common, c);
8858 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8859
8860 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8861
8862 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8863 {
8864 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8865 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8866 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8867 }
8868 else
8869 {
8870 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8871 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8872 JUMPHERE(jump[0]);
8873 }
8874 return cc + length;
8875
8876 case OP_NOT:
8877 case OP_NOTI:
8878 if (check_str_ptr)
8879 detect_partial_match(common, backtracks);
8880
8881 length = 1;
8882 #ifdef SUPPORT_UNICODE
8883 if (common->utf)
8884 {
8885 #if PCRE2_CODE_UNIT_WIDTH == 8
8886 c = *cc;
8887 if (c < 128 && !common->invalid_utf)
8888 {
8889 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8890 if (type == OP_NOT || !char_has_othercase(common, cc))
8891 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8892 else
8893 {
8894 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8895 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8896 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8897 }
8898 /* Skip the variable-length character. */
8899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8900 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8901 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8902 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8903 JUMPHERE(jump[0]);
8904 return cc + 1;
8905 }
8906 else
8907 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8908 {
8909 GETCHARLEN(c, cc, length);
8910 }
8911 }
8912 else
8913 #endif /* SUPPORT_UNICODE */
8914 c = *cc;
8915
8916 if (type == OP_NOT || !char_has_othercase(common, cc))
8917 {
8918 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8919 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8920 }
8921 else
8922 {
8923 oc = char_othercase(common, c);
8924 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8925 bit = c ^ oc;
8926 if (is_powerof2(bit))
8927 {
8928 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8929 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8930 }
8931 else
8932 {
8933 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8934 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8935 }
8936 }
8937 return cc + length;
8938
8939 case OP_CLASS:
8940 case OP_NCLASS:
8941 if (check_str_ptr)
8942 detect_partial_match(common, backtracks);
8943
8944 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8945 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8946 if (type == OP_NCLASS)
8947 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8948 else
8949 read_char(common, 0, bit, NULL, 0);
8950 #else
8951 if (type == OP_NCLASS)
8952 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8953 else
8954 read_char(common, 0, 255, NULL, 0);
8955 #endif
8956
8957 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8958 return cc + 32 / sizeof(PCRE2_UCHAR);
8959
8960 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8961 jump[0] = NULL;
8962 if (common->utf)
8963 {
8964 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8965 if (type == OP_CLASS)
8966 {
8967 add_jump(compiler, backtracks, jump[0]);
8968 jump[0] = NULL;
8969 }
8970 }
8971 #elif PCRE2_CODE_UNIT_WIDTH != 8
8972 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8973 if (type == OP_CLASS)
8974 {
8975 add_jump(compiler, backtracks, jump[0]);
8976 jump[0] = NULL;
8977 }
8978 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8979
8980 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8981 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8982 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8983 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8984 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8985 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8986
8987 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8988 if (jump[0] != NULL)
8989 JUMPHERE(jump[0]);
8990 #endif
8991 return cc + 32 / sizeof(PCRE2_UCHAR);
8992
8993 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8994 case OP_XCLASS:
8995 if (check_str_ptr)
8996 detect_partial_match(common, backtracks);
8997 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8998 return cc + GET(cc, 0) - 1;
8999 #endif
9000 }
9001 SLJIT_UNREACHABLE();
9002 return cc;
9003 }
9004
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9005 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9006 {
9007 /* This function consumes at least one input character. */
9008 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9009 DEFINE_COMPILER;
9010 PCRE2_SPTR ccbegin = cc;
9011 compare_context context;
9012 int size;
9013
9014 context.length = 0;
9015 do
9016 {
9017 if (cc >= ccend)
9018 break;
9019
9020 if (*cc == OP_CHAR)
9021 {
9022 size = 1;
9023 #ifdef SUPPORT_UNICODE
9024 if (common->utf && HAS_EXTRALEN(cc[1]))
9025 size += GET_EXTRALEN(cc[1]);
9026 #endif
9027 }
9028 else if (*cc == OP_CHARI)
9029 {
9030 size = 1;
9031 #ifdef SUPPORT_UNICODE
9032 if (common->utf)
9033 {
9034 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9035 size = 0;
9036 else if (HAS_EXTRALEN(cc[1]))
9037 size += GET_EXTRALEN(cc[1]);
9038 }
9039 else
9040 #endif
9041 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9042 size = 0;
9043 }
9044 else
9045 size = 0;
9046
9047 cc += 1 + size;
9048 context.length += IN_UCHARS(size);
9049 }
9050 while (size > 0 && context.length <= 128);
9051
9052 cc = ccbegin;
9053 if (context.length > 0)
9054 {
9055 /* We have a fixed-length byte sequence. */
9056 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9057 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9058
9059 context.sourcereg = -1;
9060 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9061 context.ucharptr = 0;
9062 #endif
9063 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9064 return cc;
9065 }
9066
9067 /* A non-fixed length character will be checked if length == 0. */
9068 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9069 }
9070
9071 /* Forward definitions. */
9072 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9073 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9074
9075 #define PUSH_BACKTRACK(size, ccstart, error) \
9076 do \
9077 { \
9078 backtrack = sljit_alloc_memory(compiler, (size)); \
9079 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9080 return error; \
9081 memset(backtrack, 0, size); \
9082 backtrack->prev = parent->top; \
9083 backtrack->cc = (ccstart); \
9084 parent->top = backtrack; \
9085 } \
9086 while (0)
9087
9088 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9089 do \
9090 { \
9091 backtrack = sljit_alloc_memory(compiler, (size)); \
9092 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9093 return; \
9094 memset(backtrack, 0, size); \
9095 backtrack->prev = parent->top; \
9096 backtrack->cc = (ccstart); \
9097 parent->top = backtrack; \
9098 } \
9099 while (0)
9100
9101 #define BACKTRACK_AS(type) ((type *)backtrack)
9102
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9103 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9104 {
9105 /* The OVECTOR offset goes to TMP2. */
9106 DEFINE_COMPILER;
9107 int count = GET2(cc, 1 + IMM2_SIZE);
9108 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9109 unsigned int offset;
9110 jump_list *found = NULL;
9111
9112 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9113
9114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9115
9116 count--;
9117 while (count-- > 0)
9118 {
9119 offset = GET2(slot, 0) << 1;
9120 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9121 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9122 slot += common->name_entry_size;
9123 }
9124
9125 offset = GET2(slot, 0) << 1;
9126 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9127 if (backtracks != NULL && !common->unset_backref)
9128 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9129
9130 set_jumps(found, LABEL());
9131 }
9132
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9133 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9134 {
9135 DEFINE_COMPILER;
9136 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9137 int offset = 0;
9138 struct sljit_jump *jump = NULL;
9139 struct sljit_jump *partial;
9140 struct sljit_jump *nopartial;
9141 #if defined SUPPORT_UNICODE
9142 struct sljit_label *loop;
9143 struct sljit_label *caseless_loop;
9144 jump_list *no_match = NULL;
9145 int source_reg = COUNT_MATCH;
9146 int source_end_reg = ARGUMENTS;
9147 int char1_reg = STACK_LIMIT;
9148 #endif /* SUPPORT_UNICODE */
9149
9150 if (ref)
9151 {
9152 offset = GET2(cc, 1) << 1;
9153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9154 /* OVECTOR(1) contains the "string begin - 1" constant. */
9155 if (withchecks && !common->unset_backref)
9156 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9157 }
9158 else
9159 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9160
9161 #if defined SUPPORT_UNICODE
9162 if (common->utf && *cc == OP_REFI)
9163 {
9164 SLJIT_ASSERT(common->iref_ptr != 0);
9165
9166 if (ref)
9167 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9168 else
9169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9170
9171 if (withchecks && emptyfail)
9172 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9173
9174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9177
9178 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9179 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9180
9181 loop = LABEL();
9182 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9183 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9184
9185 /* Read original character. It must be a valid UTF character. */
9186 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9187 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9188
9189 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9190
9191 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9192 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9193 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9194
9195 /* Read second character. */
9196 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9197
9198 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9199
9200 // PH hacking
9201 //fprintf(stderr, "~~E\n");
9202
9203 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9204
9205 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9206
9207 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9208
9209 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9210
9211 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9212
9213 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9214
9215 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9216 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9217 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9218 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9219
9220 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9221 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9222 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9223
9224 caseless_loop = LABEL();
9225 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9226 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9227 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9228 JUMPTO(SLJIT_EQUAL, loop);
9229 JUMPTO(SLJIT_LESS, caseless_loop);
9230
9231 set_jumps(no_match, LABEL());
9232 if (common->mode == PCRE2_JIT_COMPLETE)
9233 JUMPHERE(partial);
9234
9235 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9236 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9237 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9238 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9239
9240 if (common->mode != PCRE2_JIT_COMPLETE)
9241 {
9242 JUMPHERE(partial);
9243 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9244 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9245 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9246
9247 check_partial(common, FALSE);
9248 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9249 }
9250
9251 JUMPHERE(jump);
9252 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9253 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9254 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9255 return;
9256 }
9257 else
9258 #endif /* SUPPORT_UNICODE */
9259 {
9260 if (ref)
9261 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9262 else
9263 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9264
9265 if (withchecks)
9266 jump = JUMP(SLJIT_ZERO);
9267
9268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9269 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9270 if (common->mode == PCRE2_JIT_COMPLETE)
9271 add_jump(compiler, backtracks, partial);
9272
9273 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9274 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9275
9276 if (common->mode != PCRE2_JIT_COMPLETE)
9277 {
9278 nopartial = JUMP(SLJIT_JUMP);
9279 JUMPHERE(partial);
9280 /* TMP2 -= STR_END - STR_PTR */
9281 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9282 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9283 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9284 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9285 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9286 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9287 JUMPHERE(partial);
9288 check_partial(common, FALSE);
9289 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9290 JUMPHERE(nopartial);
9291 }
9292 }
9293
9294 if (jump != NULL)
9295 {
9296 if (emptyfail)
9297 add_jump(compiler, backtracks, jump);
9298 else
9299 JUMPHERE(jump);
9300 }
9301 }
9302
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9303 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9304 {
9305 DEFINE_COMPILER;
9306 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9307 backtrack_common *backtrack;
9308 PCRE2_UCHAR type;
9309 int offset = 0;
9310 struct sljit_label *label;
9311 struct sljit_jump *zerolength;
9312 struct sljit_jump *jump = NULL;
9313 PCRE2_SPTR ccbegin = cc;
9314 int min = 0, max = 0;
9315 BOOL minimize;
9316
9317 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9318
9319 if (ref)
9320 offset = GET2(cc, 1) << 1;
9321 else
9322 cc += IMM2_SIZE;
9323 type = cc[1 + IMM2_SIZE];
9324
9325 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9326 minimize = (type & 0x1) != 0;
9327 switch(type)
9328 {
9329 case OP_CRSTAR:
9330 case OP_CRMINSTAR:
9331 min = 0;
9332 max = 0;
9333 cc += 1 + IMM2_SIZE + 1;
9334 break;
9335 case OP_CRPLUS:
9336 case OP_CRMINPLUS:
9337 min = 1;
9338 max = 0;
9339 cc += 1 + IMM2_SIZE + 1;
9340 break;
9341 case OP_CRQUERY:
9342 case OP_CRMINQUERY:
9343 min = 0;
9344 max = 1;
9345 cc += 1 + IMM2_SIZE + 1;
9346 break;
9347 case OP_CRRANGE:
9348 case OP_CRMINRANGE:
9349 min = GET2(cc, 1 + IMM2_SIZE + 1);
9350 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9351 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9352 break;
9353 default:
9354 SLJIT_UNREACHABLE();
9355 break;
9356 }
9357
9358 if (!minimize)
9359 {
9360 if (min == 0)
9361 {
9362 allocate_stack(common, 2);
9363 if (ref)
9364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9367 /* Temporary release of STR_PTR. */
9368 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9369 /* Handles both invalid and empty cases. Since the minimum repeat,
9370 is zero the invalid case is basically the same as an empty case. */
9371 if (ref)
9372 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9373 else
9374 {
9375 compile_dnref_search(common, ccbegin, NULL);
9376 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9378 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9379 }
9380 /* Restore if not zero length. */
9381 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9382 }
9383 else
9384 {
9385 allocate_stack(common, 1);
9386 if (ref)
9387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9389 if (ref)
9390 {
9391 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9392 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9393 }
9394 else
9395 {
9396 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9399 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9400 }
9401 }
9402
9403 if (min > 1 || max > 1)
9404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9405
9406 label = LABEL();
9407 if (!ref)
9408 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9409 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9410
9411 if (min > 1 || max > 1)
9412 {
9413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9414 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9416 if (min > 1)
9417 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9418 if (max > 1)
9419 {
9420 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9421 allocate_stack(common, 1);
9422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9423 JUMPTO(SLJIT_JUMP, label);
9424 JUMPHERE(jump);
9425 }
9426 }
9427
9428 if (max == 0)
9429 {
9430 /* Includes min > 1 case as well. */
9431 allocate_stack(common, 1);
9432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9433 JUMPTO(SLJIT_JUMP, label);
9434 }
9435
9436 JUMPHERE(zerolength);
9437 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9438
9439 count_match(common);
9440 return cc;
9441 }
9442
9443 allocate_stack(common, ref ? 2 : 3);
9444 if (ref)
9445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9447 if (type != OP_CRMINSTAR)
9448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9449
9450 if (min == 0)
9451 {
9452 /* Handles both invalid and empty cases. Since the minimum repeat,
9453 is zero the invalid case is basically the same as an empty case. */
9454 if (ref)
9455 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9456 else
9457 {
9458 compile_dnref_search(common, ccbegin, NULL);
9459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9461 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9462 }
9463 /* Length is non-zero, we can match real repeats. */
9464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9465 jump = JUMP(SLJIT_JUMP);
9466 }
9467 else
9468 {
9469 if (ref)
9470 {
9471 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9472 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9473 }
9474 else
9475 {
9476 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9477 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9479 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9480 }
9481 }
9482
9483 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9484 if (max > 0)
9485 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9486
9487 if (!ref)
9488 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9489 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9491
9492 if (min > 1)
9493 {
9494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9495 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9497 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9498 }
9499 else if (max > 0)
9500 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9501
9502 if (jump != NULL)
9503 JUMPHERE(jump);
9504 JUMPHERE(zerolength);
9505
9506 count_match(common);
9507 return cc;
9508 }
9509
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9510 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9511 {
9512 DEFINE_COMPILER;
9513 backtrack_common *backtrack;
9514 recurse_entry *entry = common->entries;
9515 recurse_entry *prev = NULL;
9516 sljit_sw start = GET(cc, 1);
9517 PCRE2_SPTR start_cc;
9518 BOOL needs_control_head;
9519
9520 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9521
9522 /* Inlining simple patterns. */
9523 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9524 {
9525 start_cc = common->start + start;
9526 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9527 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9528 return cc + 1 + LINK_SIZE;
9529 }
9530
9531 while (entry != NULL)
9532 {
9533 if (entry->start == start)
9534 break;
9535 prev = entry;
9536 entry = entry->next;
9537 }
9538
9539 if (entry == NULL)
9540 {
9541 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9542 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9543 return NULL;
9544 entry->next = NULL;
9545 entry->entry_label = NULL;
9546 entry->backtrack_label = NULL;
9547 entry->entry_calls = NULL;
9548 entry->backtrack_calls = NULL;
9549 entry->start = start;
9550
9551 if (prev != NULL)
9552 prev->next = entry;
9553 else
9554 common->entries = entry;
9555 }
9556
9557 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9558
9559 if (entry->entry_label == NULL)
9560 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9561 else
9562 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9563 /* Leave if the match is failed. */
9564 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9565 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9566 return cc + 1 + LINK_SIZE;
9567 }
9568
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9569 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9570 {
9571 PCRE2_SPTR begin;
9572 PCRE2_SIZE *ovector;
9573 sljit_u32 oveccount, capture_top;
9574
9575 if (arguments->callout == NULL)
9576 return 0;
9577
9578 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9579
9580 begin = arguments->begin;
9581 ovector = (PCRE2_SIZE*)(callout_block + 1);
9582 oveccount = callout_block->capture_top;
9583
9584 SLJIT_ASSERT(oveccount >= 1);
9585
9586 callout_block->version = 2;
9587 callout_block->callout_flags = 0;
9588
9589 /* Offsets in subject. */
9590 callout_block->subject_length = arguments->end - arguments->begin;
9591 callout_block->start_match = jit_ovector[0] - begin;
9592 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9593 callout_block->subject = begin;
9594
9595 /* Convert and copy the JIT offset vector to the ovector array. */
9596 callout_block->capture_top = 1;
9597 callout_block->offset_vector = ovector;
9598
9599 ovector[0] = PCRE2_UNSET;
9600 ovector[1] = PCRE2_UNSET;
9601 ovector += 2;
9602 jit_ovector += 2;
9603 capture_top = 1;
9604
9605 /* Convert pointers to sizes. */
9606 while (--oveccount != 0)
9607 {
9608 capture_top++;
9609
9610 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9611 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9612
9613 if (ovector[0] != PCRE2_UNSET)
9614 callout_block->capture_top = capture_top;
9615
9616 ovector += 2;
9617 jit_ovector += 2;
9618 }
9619
9620 return (arguments->callout)(callout_block, arguments->callout_data);
9621 }
9622
9623 #define CALLOUT_ARG_OFFSET(arg) \
9624 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9625
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9626 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9627 {
9628 DEFINE_COMPILER;
9629 backtrack_common *backtrack;
9630 sljit_s32 mov_opcode;
9631 unsigned int callout_length = (*cc == OP_CALLOUT)
9632 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9633 sljit_sw value1;
9634 sljit_sw value2;
9635 sljit_sw value3;
9636 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9637
9638 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9639
9640 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9641
9642 allocate_stack(common, callout_arg_size);
9643
9644 SLJIT_ASSERT(common->capture_last_ptr != 0);
9645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9646 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9647 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9648 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9649 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9650 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9651
9652 /* These pointer sized fields temporarly stores internal variables. */
9653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9654
9655 if (common->mark_ptr != 0)
9656 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9657 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9658 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9659 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9660
9661 if (*cc == OP_CALLOUT)
9662 {
9663 value1 = 0;
9664 value2 = 0;
9665 value3 = 0;
9666 }
9667 else
9668 {
9669 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9670 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9671 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9672 }
9673
9674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9675 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9676 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9678
9679 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9680
9681 /* Needed to save important temporary registers. */
9682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9683 /* SLJIT_R0 = arguments */
9684 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9685 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9686 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9687 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9688 free_stack(common, callout_arg_size);
9689
9690 /* Check return value. */
9691 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9692 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9693 if (common->abort_label == NULL)
9694 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9695 else
9696 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9697 return cc + callout_length;
9698 }
9699
9700 #undef CALLOUT_ARG_SIZE
9701 #undef CALLOUT_ARG_OFFSET
9702
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9703 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9704 {
9705 while (TRUE)
9706 {
9707 switch (*cc)
9708 {
9709 case OP_CALLOUT_STR:
9710 cc += GET(cc, 1 + 2*LINK_SIZE);
9711 break;
9712
9713 case OP_NOT_WORD_BOUNDARY:
9714 case OP_WORD_BOUNDARY:
9715 case OP_CIRC:
9716 case OP_CIRCM:
9717 case OP_DOLL:
9718 case OP_DOLLM:
9719 case OP_CALLOUT:
9720 case OP_ALT:
9721 cc += PRIV(OP_lengths)[*cc];
9722 break;
9723
9724 case OP_KET:
9725 return FALSE;
9726
9727 default:
9728 return TRUE;
9729 }
9730 }
9731 }
9732
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9733 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9734 {
9735 DEFINE_COMPILER;
9736 int framesize;
9737 int extrasize;
9738 BOOL local_quit_available = FALSE;
9739 BOOL needs_control_head;
9740 int private_data_ptr;
9741 backtrack_common altbacktrack;
9742 PCRE2_SPTR ccbegin;
9743 PCRE2_UCHAR opcode;
9744 PCRE2_UCHAR bra = OP_BRA;
9745 jump_list *tmp = NULL;
9746 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9747 jump_list **found;
9748 /* Saving previous accept variables. */
9749 BOOL save_local_quit_available = common->local_quit_available;
9750 BOOL save_in_positive_assertion = common->in_positive_assertion;
9751 then_trap_backtrack *save_then_trap = common->then_trap;
9752 struct sljit_label *save_quit_label = common->quit_label;
9753 struct sljit_label *save_accept_label = common->accept_label;
9754 jump_list *save_quit = common->quit;
9755 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9756 jump_list *save_accept = common->accept;
9757 struct sljit_jump *jump;
9758 struct sljit_jump *brajump = NULL;
9759
9760 /* Assert captures then. */
9761 common->then_trap = NULL;
9762
9763 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9764 {
9765 SLJIT_ASSERT(!conditional);
9766 bra = *cc;
9767 cc++;
9768 }
9769 private_data_ptr = PRIVATE_DATA(cc);
9770 SLJIT_ASSERT(private_data_ptr != 0);
9771 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9772 backtrack->framesize = framesize;
9773 backtrack->private_data_ptr = private_data_ptr;
9774 opcode = *cc;
9775 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9776 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9777 ccbegin = cc;
9778 cc += GET(cc, 1);
9779
9780 if (bra == OP_BRAMINZERO)
9781 {
9782 /* This is a braminzero backtrack path. */
9783 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9784 free_stack(common, 1);
9785 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9786 }
9787
9788 if (framesize < 0)
9789 {
9790 extrasize = 1;
9791 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9792 extrasize = 0;
9793
9794 if (needs_control_head)
9795 extrasize++;
9796
9797 if (framesize == no_frame)
9798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9799
9800 if (extrasize > 0)
9801 allocate_stack(common, extrasize);
9802
9803 if (needs_control_head)
9804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9805
9806 if (extrasize > 0)
9807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9808
9809 if (needs_control_head)
9810 {
9811 SLJIT_ASSERT(extrasize == 2);
9812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9814 }
9815 }
9816 else
9817 {
9818 extrasize = needs_control_head ? 3 : 2;
9819 allocate_stack(common, framesize + extrasize);
9820
9821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9822 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9824 if (needs_control_head)
9825 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9827
9828 if (needs_control_head)
9829 {
9830 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9831 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9833 }
9834 else
9835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9836
9837 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9838 }
9839
9840 memset(&altbacktrack, 0, sizeof(backtrack_common));
9841 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9842 {
9843 /* Control verbs cannot escape from these asserts. */
9844 local_quit_available = TRUE;
9845 common->local_quit_available = TRUE;
9846 common->quit_label = NULL;
9847 common->quit = NULL;
9848 }
9849
9850 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9851 common->positive_assertion_quit = NULL;
9852
9853 while (1)
9854 {
9855 common->accept_label = NULL;
9856 common->accept = NULL;
9857 altbacktrack.top = NULL;
9858 altbacktrack.topbacktracks = NULL;
9859
9860 if (*ccbegin == OP_ALT && extrasize > 0)
9861 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9862
9863 altbacktrack.cc = ccbegin;
9864 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9865 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9866 {
9867 if (local_quit_available)
9868 {
9869 common->local_quit_available = save_local_quit_available;
9870 common->quit_label = save_quit_label;
9871 common->quit = save_quit;
9872 }
9873 common->in_positive_assertion = save_in_positive_assertion;
9874 common->then_trap = save_then_trap;
9875 common->accept_label = save_accept_label;
9876 common->positive_assertion_quit = save_positive_assertion_quit;
9877 common->accept = save_accept;
9878 return NULL;
9879 }
9880 common->accept_label = LABEL();
9881 if (common->accept != NULL)
9882 set_jumps(common->accept, common->accept_label);
9883
9884 /* Reset stack. */
9885 if (framesize < 0)
9886 {
9887 if (framesize == no_frame)
9888 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9889 else if (extrasize > 0)
9890 free_stack(common, extrasize);
9891
9892 if (needs_control_head)
9893 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9894 }
9895 else
9896 {
9897 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9898 {
9899 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9900 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9901 if (needs_control_head)
9902 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9903 }
9904 else
9905 {
9906 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9907 if (needs_control_head)
9908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9909 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9910 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9911 }
9912 }
9913
9914 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9915 {
9916 /* We know that STR_PTR was stored on the top of the stack. */
9917 if (conditional)
9918 {
9919 if (extrasize > 0)
9920 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9921 }
9922 else if (bra == OP_BRAZERO)
9923 {
9924 if (framesize < 0)
9925 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9926 else
9927 {
9928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9929 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9931 }
9932 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9933 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9934 }
9935 else if (framesize >= 0)
9936 {
9937 /* For OP_BRA and OP_BRAMINZERO. */
9938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9939 }
9940 }
9941 add_jump(compiler, found, JUMP(SLJIT_JUMP));
9942
9943 compile_backtrackingpath(common, altbacktrack.top);
9944 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9945 {
9946 if (local_quit_available)
9947 {
9948 common->local_quit_available = save_local_quit_available;
9949 common->quit_label = save_quit_label;
9950 common->quit = save_quit;
9951 }
9952 common->in_positive_assertion = save_in_positive_assertion;
9953 common->then_trap = save_then_trap;
9954 common->accept_label = save_accept_label;
9955 common->positive_assertion_quit = save_positive_assertion_quit;
9956 common->accept = save_accept;
9957 return NULL;
9958 }
9959 set_jumps(altbacktrack.topbacktracks, LABEL());
9960
9961 if (*cc != OP_ALT)
9962 break;
9963
9964 ccbegin = cc;
9965 cc += GET(cc, 1);
9966 }
9967
9968 if (local_quit_available)
9969 {
9970 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9971 /* Makes the check less complicated below. */
9972 common->positive_assertion_quit = common->quit;
9973 }
9974
9975 /* None of them matched. */
9976 if (common->positive_assertion_quit != NULL)
9977 {
9978 jump = JUMP(SLJIT_JUMP);
9979 set_jumps(common->positive_assertion_quit, LABEL());
9980 SLJIT_ASSERT(framesize != no_stack);
9981 if (framesize < 0)
9982 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9983 else
9984 {
9985 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9986 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9987 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9988 }
9989 JUMPHERE(jump);
9990 }
9991
9992 if (needs_control_head)
9993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9994
9995 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9996 {
9997 /* Assert is failed. */
9998 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9999 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10000
10001 if (framesize < 0)
10002 {
10003 /* The topmost item should be 0. */
10004 if (bra == OP_BRAZERO)
10005 {
10006 if (extrasize == 2)
10007 free_stack(common, 1);
10008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10009 }
10010 else if (extrasize > 0)
10011 free_stack(common, extrasize);
10012 }
10013 else
10014 {
10015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10016 /* The topmost item should be 0. */
10017 if (bra == OP_BRAZERO)
10018 {
10019 free_stack(common, framesize + extrasize - 1);
10020 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10021 }
10022 else
10023 free_stack(common, framesize + extrasize);
10024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10025 }
10026 jump = JUMP(SLJIT_JUMP);
10027 if (bra != OP_BRAZERO)
10028 add_jump(compiler, target, jump);
10029
10030 /* Assert is successful. */
10031 set_jumps(tmp, LABEL());
10032 if (framesize < 0)
10033 {
10034 /* We know that STR_PTR was stored on the top of the stack. */
10035 if (extrasize > 0)
10036 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10037
10038 /* Keep the STR_PTR on the top of the stack. */
10039 if (bra == OP_BRAZERO)
10040 {
10041 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10042 if (extrasize == 2)
10043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10044 }
10045 else if (bra == OP_BRAMINZERO)
10046 {
10047 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10048 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10049 }
10050 }
10051 else
10052 {
10053 if (bra == OP_BRA)
10054 {
10055 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10056 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10057 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10058 }
10059 else
10060 {
10061 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10062 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10063 if (extrasize == 2)
10064 {
10065 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066 if (bra == OP_BRAMINZERO)
10067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10068 }
10069 else
10070 {
10071 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
10072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10073 }
10074 }
10075 }
10076
10077 if (bra == OP_BRAZERO)
10078 {
10079 backtrack->matchingpath = LABEL();
10080 SET_LABEL(jump, backtrack->matchingpath);
10081 }
10082 else if (bra == OP_BRAMINZERO)
10083 {
10084 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10085 JUMPHERE(brajump);
10086 if (framesize >= 0)
10087 {
10088 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10089 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10091 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10093 }
10094 set_jumps(backtrack->common.topbacktracks, LABEL());
10095 }
10096 }
10097 else
10098 {
10099 /* AssertNot is successful. */
10100 if (framesize < 0)
10101 {
10102 if (extrasize > 0)
10103 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10104
10105 if (bra != OP_BRA)
10106 {
10107 if (extrasize == 2)
10108 free_stack(common, 1);
10109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10110 }
10111 else if (extrasize > 0)
10112 free_stack(common, extrasize);
10113 }
10114 else
10115 {
10116 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10118 /* The topmost item should be 0. */
10119 if (bra != OP_BRA)
10120 {
10121 free_stack(common, framesize + extrasize - 1);
10122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10123 }
10124 else
10125 free_stack(common, framesize + extrasize);
10126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10127 }
10128
10129 if (bra == OP_BRAZERO)
10130 backtrack->matchingpath = LABEL();
10131 else if (bra == OP_BRAMINZERO)
10132 {
10133 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10134 JUMPHERE(brajump);
10135 }
10136
10137 if (bra != OP_BRA)
10138 {
10139 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10140 set_jumps(backtrack->common.topbacktracks, LABEL());
10141 backtrack->common.topbacktracks = NULL;
10142 }
10143 }
10144
10145 if (local_quit_available)
10146 {
10147 common->local_quit_available = save_local_quit_available;
10148 common->quit_label = save_quit_label;
10149 common->quit = save_quit;
10150 }
10151 common->in_positive_assertion = save_in_positive_assertion;
10152 common->then_trap = save_then_trap;
10153 common->accept_label = save_accept_label;
10154 common->positive_assertion_quit = save_positive_assertion_quit;
10155 common->accept = save_accept;
10156 return cc + 1 + LINK_SIZE;
10157 }
10158
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10159 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10160 {
10161 DEFINE_COMPILER;
10162 int stacksize;
10163
10164 if (framesize < 0)
10165 {
10166 if (framesize == no_frame)
10167 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10168 else
10169 {
10170 stacksize = needs_control_head ? 1 : 0;
10171 if (ket != OP_KET || has_alternatives)
10172 stacksize++;
10173
10174 if (stacksize > 0)
10175 free_stack(common, stacksize);
10176 }
10177
10178 if (needs_control_head)
10179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10180
10181 /* TMP2 which is set here used by OP_KETRMAX below. */
10182 if (ket == OP_KETRMAX)
10183 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10184 else if (ket == OP_KETRMIN)
10185 {
10186 /* Move the STR_PTR to the private_data_ptr. */
10187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10188 }
10189 }
10190 else
10191 {
10192 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10193 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10194 if (needs_control_head)
10195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196
10197 if (ket == OP_KETRMAX)
10198 {
10199 /* TMP2 which is set here used by OP_KETRMAX below. */
10200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10201 }
10202 }
10203 if (needs_control_head)
10204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10205 }
10206
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10207 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10208 {
10209 DEFINE_COMPILER;
10210
10211 if (common->capture_last_ptr != 0)
10212 {
10213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10216 stacksize++;
10217 }
10218 if (common->optimized_cbracket[offset >> 1] == 0)
10219 {
10220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10227 stacksize += 2;
10228 }
10229 return stacksize;
10230 }
10231
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10232 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10233 {
10234 if (PRIV(script_run)(ptr, endptr, FALSE))
10235 return endptr;
10236 return NULL;
10237 }
10238
10239 #ifdef SUPPORT_UNICODE
10240
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10241 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10242 {
10243 if (PRIV(script_run)(ptr, endptr, TRUE))
10244 return endptr;
10245 return NULL;
10246 }
10247
10248 #endif /* SUPPORT_UNICODE */
10249
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10250 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10251 {
10252 DEFINE_COMPILER;
10253
10254 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10255
10256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10257 #ifdef SUPPORT_UNICODE
10258 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10259 common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10260 #else
10261 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10262 #endif
10263
10264 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10265 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10266 }
10267
10268 /*
10269 Handling bracketed expressions is probably the most complex part.
10270
10271 Stack layout naming characters:
10272 S - Push the current STR_PTR
10273 0 - Push a 0 (NULL)
10274 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10275 before the next alternative. Not pushed if there are no alternatives.
10276 M - Any values pushed by the current alternative. Can be empty, or anything.
10277 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10278 L - Push the previous local (pointed by localptr) to the stack
10279 () - opional values stored on the stack
10280 ()* - optonal, can be stored multiple times
10281
10282 The following list shows the regular expression templates, their PCRE byte codes
10283 and stack layout supported by pcre-sljit.
10284
10285 (?:) OP_BRA | OP_KET A M
10286 () OP_CBRA | OP_KET C M
10287 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10288 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10289 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10290 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10291 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10292 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10293 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10294 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10295 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10296 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10297 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10298 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10299 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10300 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10301 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10302 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10303 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10304 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10305 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10306 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10307
10308
10309 Stack layout naming characters:
10310 A - Push the alternative index (starting from 0) on the stack.
10311 Not pushed if there is no alternatives.
10312 M - Any values pushed by the current alternative. Can be empty, or anything.
10313
10314 The next list shows the possible content of a bracket:
10315 (|) OP_*BRA | OP_ALT ... M A
10316 (?()|) OP_*COND | OP_ALT M A
10317 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10318 Or nothing, if trace is unnecessary
10319 */
10320
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10321 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10322 {
10323 DEFINE_COMPILER;
10324 backtrack_common *backtrack;
10325 PCRE2_UCHAR opcode;
10326 int private_data_ptr = 0;
10327 int offset = 0;
10328 int i, stacksize;
10329 int repeat_ptr = 0, repeat_length = 0;
10330 int repeat_type = 0, repeat_count = 0;
10331 PCRE2_SPTR ccbegin;
10332 PCRE2_SPTR matchingpath;
10333 PCRE2_SPTR slot;
10334 PCRE2_UCHAR bra = OP_BRA;
10335 PCRE2_UCHAR ket;
10336 assert_backtrack *assert;
10337 BOOL has_alternatives;
10338 BOOL needs_control_head = FALSE;
10339 struct sljit_jump *jump;
10340 struct sljit_jump *skip;
10341 struct sljit_label *rmax_label = NULL;
10342 struct sljit_jump *braminzero = NULL;
10343
10344 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10345
10346 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10347 {
10348 bra = *cc;
10349 cc++;
10350 opcode = *cc;
10351 }
10352
10353 opcode = *cc;
10354 ccbegin = cc;
10355 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10356 ket = *matchingpath;
10357 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10358 {
10359 repeat_ptr = PRIVATE_DATA(matchingpath);
10360 repeat_length = PRIVATE_DATA(matchingpath + 1);
10361 repeat_type = PRIVATE_DATA(matchingpath + 2);
10362 repeat_count = PRIVATE_DATA(matchingpath + 3);
10363 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10364 if (repeat_type == OP_UPTO)
10365 ket = OP_KETRMAX;
10366 if (repeat_type == OP_MINUPTO)
10367 ket = OP_KETRMIN;
10368 }
10369
10370 matchingpath = ccbegin + 1 + LINK_SIZE;
10371 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10372 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10373 cc += GET(cc, 1);
10374
10375 has_alternatives = *cc == OP_ALT;
10376 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10377 {
10378 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10379 compile_time_checks_must_be_grouped_together);
10380 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10381 }
10382
10383 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10384 opcode = OP_SCOND;
10385
10386 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10387 {
10388 /* Capturing brackets has a pre-allocated space. */
10389 offset = GET2(ccbegin, 1 + LINK_SIZE);
10390 if (common->optimized_cbracket[offset] == 0)
10391 {
10392 private_data_ptr = OVECTOR_PRIV(offset);
10393 offset <<= 1;
10394 }
10395 else
10396 {
10397 offset <<= 1;
10398 private_data_ptr = OVECTOR(offset);
10399 }
10400 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10401 matchingpath += IMM2_SIZE;
10402 }
10403 else if (opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10404 {
10405 /* Other brackets simply allocate the next entry. */
10406 private_data_ptr = PRIVATE_DATA(ccbegin);
10407 SLJIT_ASSERT(private_data_ptr != 0);
10408 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10409 if (opcode == OP_ONCE)
10410 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10411 }
10412
10413 /* Instructions before the first alternative. */
10414 stacksize = 0;
10415 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10416 stacksize++;
10417 if (bra == OP_BRAZERO)
10418 stacksize++;
10419
10420 if (stacksize > 0)
10421 allocate_stack(common, stacksize);
10422
10423 stacksize = 0;
10424 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10425 {
10426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10427 stacksize++;
10428 }
10429
10430 if (bra == OP_BRAZERO)
10431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10432
10433 if (bra == OP_BRAMINZERO)
10434 {
10435 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10436 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10437 if (ket != OP_KETRMIN)
10438 {
10439 free_stack(common, 1);
10440 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10441 }
10442 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10443 {
10444 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10445 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10446 /* Nothing stored during the first run. */
10447 skip = JUMP(SLJIT_JUMP);
10448 JUMPHERE(jump);
10449 /* Checking zero-length iteration. */
10450 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10451 {
10452 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10453 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454 }
10455 else
10456 {
10457 /* Except when the whole stack frame must be saved. */
10458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10459 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10460 }
10461 JUMPHERE(skip);
10462 }
10463 else
10464 {
10465 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10466 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10467 JUMPHERE(jump);
10468 }
10469 }
10470
10471 if (repeat_type != 0)
10472 {
10473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10474 if (repeat_type == OP_EXACT)
10475 rmax_label = LABEL();
10476 }
10477
10478 if (ket == OP_KETRMIN)
10479 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10480
10481 if (ket == OP_KETRMAX)
10482 {
10483 rmax_label = LABEL();
10484 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10485 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10486 }
10487
10488 /* Handling capturing brackets and alternatives. */
10489 if (opcode == OP_ONCE)
10490 {
10491 stacksize = 0;
10492 if (needs_control_head)
10493 {
10494 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10495 stacksize++;
10496 }
10497
10498 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10499 {
10500 /* Neither capturing brackets nor recursions are found in the block. */
10501 if (ket == OP_KETRMIN)
10502 {
10503 stacksize += 2;
10504 if (!needs_control_head)
10505 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10506 }
10507 else
10508 {
10509 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10511 if (ket == OP_KETRMAX || has_alternatives)
10512 stacksize++;
10513 }
10514
10515 if (stacksize > 0)
10516 allocate_stack(common, stacksize);
10517
10518 stacksize = 0;
10519 if (needs_control_head)
10520 {
10521 stacksize++;
10522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10523 }
10524
10525 if (ket == OP_KETRMIN)
10526 {
10527 if (needs_control_head)
10528 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10530 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10531 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10533 }
10534 else if (ket == OP_KETRMAX || has_alternatives)
10535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10536 }
10537 else
10538 {
10539 if (ket != OP_KET || has_alternatives)
10540 stacksize++;
10541
10542 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10543 allocate_stack(common, stacksize);
10544
10545 if (needs_control_head)
10546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10547
10548 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10549 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10550
10551 stacksize = needs_control_head ? 1 : 0;
10552 if (ket != OP_KET || has_alternatives)
10553 {
10554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10556 stacksize++;
10557 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10558 }
10559 else
10560 {
10561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10562 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10563 }
10564 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10565 }
10566 }
10567 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10568 {
10569 /* Saving the previous values. */
10570 if (common->optimized_cbracket[offset >> 1] != 0)
10571 {
10572 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10573 allocate_stack(common, 2);
10574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10576 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10577 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10578 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10579 }
10580 else
10581 {
10582 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583 allocate_stack(common, 1);
10584 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10586 }
10587 }
10588 else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10589 {
10590 /* Saving the previous value. */
10591 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10592 allocate_stack(common, 1);
10593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10595 }
10596 else if (has_alternatives)
10597 {
10598 /* Pushing the starting string pointer. */
10599 allocate_stack(common, 1);
10600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10601 }
10602
10603 /* Generating code for the first alternative. */
10604 if (opcode == OP_COND || opcode == OP_SCOND)
10605 {
10606 if (*matchingpath == OP_CREF)
10607 {
10608 SLJIT_ASSERT(has_alternatives);
10609 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10610 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10611 matchingpath += 1 + IMM2_SIZE;
10612 }
10613 else if (*matchingpath == OP_DNCREF)
10614 {
10615 SLJIT_ASSERT(has_alternatives);
10616
10617 i = GET2(matchingpath, 1 + IMM2_SIZE);
10618 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10619 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10621 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10622 slot += common->name_entry_size;
10623 i--;
10624 while (i-- > 0)
10625 {
10626 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10627 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10628 slot += common->name_entry_size;
10629 }
10630 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10631 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10632 matchingpath += 1 + 2 * IMM2_SIZE;
10633 }
10634 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10635 {
10636 /* Never has other case. */
10637 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10638 SLJIT_ASSERT(!has_alternatives);
10639
10640 if (*matchingpath == OP_TRUE)
10641 {
10642 stacksize = 1;
10643 matchingpath++;
10644 }
10645 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10646 stacksize = 0;
10647 else if (*matchingpath == OP_RREF)
10648 {
10649 stacksize = GET2(matchingpath, 1);
10650 if (common->currententry == NULL)
10651 stacksize = 0;
10652 else if (stacksize == RREF_ANY)
10653 stacksize = 1;
10654 else if (common->currententry->start == 0)
10655 stacksize = stacksize == 0;
10656 else
10657 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10658
10659 if (stacksize != 0)
10660 matchingpath += 1 + IMM2_SIZE;
10661 }
10662 else
10663 {
10664 if (common->currententry == NULL || common->currententry->start == 0)
10665 stacksize = 0;
10666 else
10667 {
10668 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10669 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10670 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10671 while (stacksize > 0)
10672 {
10673 if ((int)GET2(slot, 0) == i)
10674 break;
10675 slot += common->name_entry_size;
10676 stacksize--;
10677 }
10678 }
10679
10680 if (stacksize != 0)
10681 matchingpath += 1 + 2 * IMM2_SIZE;
10682 }
10683
10684 /* The stacksize == 0 is a common "else" case. */
10685 if (stacksize == 0)
10686 {
10687 if (*cc == OP_ALT)
10688 {
10689 matchingpath = cc + 1 + LINK_SIZE;
10690 cc += GET(cc, 1);
10691 }
10692 else
10693 matchingpath = cc;
10694 }
10695 }
10696 else
10697 {
10698 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10699 /* Similar code as PUSH_BACKTRACK macro. */
10700 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10701 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10702 return NULL;
10703 memset(assert, 0, sizeof(assert_backtrack));
10704 assert->common.cc = matchingpath;
10705 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10706 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10707 }
10708 }
10709
10710 compile_matchingpath(common, matchingpath, cc, backtrack);
10711 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10712 return NULL;
10713
10714 if (opcode == OP_ONCE)
10715 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10716
10717 if (opcode == OP_SCRIPT_RUN)
10718 match_script_run_common(common, private_data_ptr, backtrack);
10719
10720 stacksize = 0;
10721 if (repeat_type == OP_MINUPTO)
10722 {
10723 /* We need to preserve the counter. TMP2 will be used below. */
10724 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10725 stacksize++;
10726 }
10727 if (ket != OP_KET || bra != OP_BRA)
10728 stacksize++;
10729 if (offset != 0)
10730 {
10731 if (common->capture_last_ptr != 0)
10732 stacksize++;
10733 if (common->optimized_cbracket[offset >> 1] == 0)
10734 stacksize += 2;
10735 }
10736 if (has_alternatives && opcode != OP_ONCE)
10737 stacksize++;
10738
10739 if (stacksize > 0)
10740 allocate_stack(common, stacksize);
10741
10742 stacksize = 0;
10743 if (repeat_type == OP_MINUPTO)
10744 {
10745 /* TMP2 was set above. */
10746 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10747 stacksize++;
10748 }
10749
10750 if (ket != OP_KET || bra != OP_BRA)
10751 {
10752 if (ket != OP_KET)
10753 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10754 else
10755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10756 stacksize++;
10757 }
10758
10759 if (offset != 0)
10760 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10761
10762 if (has_alternatives)
10763 {
10764 if (opcode != OP_ONCE)
10765 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10766 if (ket != OP_KETRMAX)
10767 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10768 }
10769
10770 /* Must be after the matchingpath label. */
10771 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10772 {
10773 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10775 }
10776
10777 if (ket == OP_KETRMAX)
10778 {
10779 if (repeat_type != 0)
10780 {
10781 if (has_alternatives)
10782 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10783 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10784 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10785 /* Drop STR_PTR for greedy plus quantifier. */
10786 if (opcode != OP_ONCE)
10787 free_stack(common, 1);
10788 }
10789 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10790 {
10791 if (has_alternatives)
10792 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10793
10794 /* Checking zero-length iteration. */
10795 if (opcode != OP_ONCE)
10796 {
10797 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10798 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10799 /* Drop STR_PTR for greedy plus quantifier. */
10800 if (bra != OP_BRAZERO)
10801 free_stack(common, 1);
10802 }
10803 else
10804 /* TMP2 must contain the starting STR_PTR. */
10805 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10806 }
10807 else
10808 JUMPTO(SLJIT_JUMP, rmax_label);
10809 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10810 }
10811
10812 if (repeat_type == OP_EXACT)
10813 {
10814 count_match(common);
10815 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10816 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10817 }
10818 else if (repeat_type == OP_UPTO)
10819 {
10820 /* We need to preserve the counter. */
10821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10822 allocate_stack(common, 1);
10823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10824 }
10825
10826 if (bra == OP_BRAZERO)
10827 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10828
10829 if (bra == OP_BRAMINZERO)
10830 {
10831 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10832 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10833 if (braminzero != NULL)
10834 {
10835 JUMPHERE(braminzero);
10836 /* We need to release the end pointer to perform the
10837 backtrack for the zero-length iteration. When
10838 framesize is < 0, OP_ONCE will do the release itself. */
10839 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10840 {
10841 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10842 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10843 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10844 }
10845 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10846 free_stack(common, 1);
10847 }
10848 /* Continue to the normal backtrack. */
10849 }
10850
10851 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10852 count_match(common);
10853
10854 /* Skip the other alternatives. */
10855 while (*cc == OP_ALT)
10856 cc += GET(cc, 1);
10857 cc += 1 + LINK_SIZE;
10858
10859 if (opcode == OP_ONCE)
10860 {
10861 /* We temporarily encode the needs_control_head in the lowest bit.
10862 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10863 the same value for small signed numbers (including negative numbers). */
10864 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10865 }
10866 return cc + repeat_length;
10867 }
10868
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10869 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10870 {
10871 DEFINE_COMPILER;
10872 backtrack_common *backtrack;
10873 PCRE2_UCHAR opcode;
10874 int private_data_ptr;
10875 int cbraprivptr = 0;
10876 BOOL needs_control_head;
10877 int framesize;
10878 int stacksize;
10879 int offset = 0;
10880 BOOL zero = FALSE;
10881 PCRE2_SPTR ccbegin = NULL;
10882 int stack; /* Also contains the offset of control head. */
10883 struct sljit_label *loop = NULL;
10884 struct jump_list *emptymatch = NULL;
10885
10886 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10887 if (*cc == OP_BRAPOSZERO)
10888 {
10889 zero = TRUE;
10890 cc++;
10891 }
10892
10893 opcode = *cc;
10894 private_data_ptr = PRIVATE_DATA(cc);
10895 SLJIT_ASSERT(private_data_ptr != 0);
10896 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10897 switch(opcode)
10898 {
10899 case OP_BRAPOS:
10900 case OP_SBRAPOS:
10901 ccbegin = cc + 1 + LINK_SIZE;
10902 break;
10903
10904 case OP_CBRAPOS:
10905 case OP_SCBRAPOS:
10906 offset = GET2(cc, 1 + LINK_SIZE);
10907 /* This case cannot be optimized in the same was as
10908 normal capturing brackets. */
10909 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10910 cbraprivptr = OVECTOR_PRIV(offset);
10911 offset <<= 1;
10912 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10913 break;
10914
10915 default:
10916 SLJIT_UNREACHABLE();
10917 break;
10918 }
10919
10920 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10921 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10922 if (framesize < 0)
10923 {
10924 if (offset != 0)
10925 {
10926 stacksize = 2;
10927 if (common->capture_last_ptr != 0)
10928 stacksize++;
10929 }
10930 else
10931 stacksize = 1;
10932
10933 if (needs_control_head)
10934 stacksize++;
10935 if (!zero)
10936 stacksize++;
10937
10938 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10939 allocate_stack(common, stacksize);
10940 if (framesize == no_frame)
10941 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10942
10943 stack = 0;
10944 if (offset != 0)
10945 {
10946 stack = 2;
10947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10950 if (common->capture_last_ptr != 0)
10951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10952 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10953 if (needs_control_head)
10954 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10955 if (common->capture_last_ptr != 0)
10956 {
10957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10958 stack = 3;
10959 }
10960 }
10961 else
10962 {
10963 if (needs_control_head)
10964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10966 stack = 1;
10967 }
10968
10969 if (needs_control_head)
10970 stack++;
10971 if (!zero)
10972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10973 if (needs_control_head)
10974 {
10975 stack--;
10976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10977 }
10978 }
10979 else
10980 {
10981 stacksize = framesize + 1;
10982 if (!zero)
10983 stacksize++;
10984 if (needs_control_head)
10985 stacksize++;
10986 if (offset == 0)
10987 stacksize++;
10988 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10989
10990 allocate_stack(common, stacksize);
10991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10992 if (needs_control_head)
10993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10994 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10995
10996 stack = 0;
10997 if (!zero)
10998 {
10999 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11000 stack = 1;
11001 }
11002 if (needs_control_head)
11003 {
11004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11005 stack++;
11006 }
11007 if (offset == 0)
11008 {
11009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11010 stack++;
11011 }
11012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11013 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11014 stack -= 1 + (offset == 0);
11015 }
11016
11017 if (offset != 0)
11018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11019
11020 loop = LABEL();
11021 while (*cc != OP_KETRPOS)
11022 {
11023 backtrack->top = NULL;
11024 backtrack->topbacktracks = NULL;
11025 cc += GET(cc, 1);
11026
11027 compile_matchingpath(common, ccbegin, cc, backtrack);
11028 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11029 return NULL;
11030
11031 if (framesize < 0)
11032 {
11033 if (framesize == no_frame)
11034 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11035
11036 if (offset != 0)
11037 {
11038 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11041 if (common->capture_last_ptr != 0)
11042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11044 }
11045 else
11046 {
11047 if (opcode == OP_SBRAPOS)
11048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11049 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11050 }
11051
11052 /* Even if the match is empty, we need to reset the control head. */
11053 if (needs_control_head)
11054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11055
11056 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11057 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11058
11059 if (!zero)
11060 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11061 }
11062 else
11063 {
11064 if (offset != 0)
11065 {
11066 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11070 if (common->capture_last_ptr != 0)
11071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11073 }
11074 else
11075 {
11076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11077 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11078 if (opcode == OP_SBRAPOS)
11079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11080 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11081 }
11082
11083 /* Even if the match is empty, we need to reset the control head. */
11084 if (needs_control_head)
11085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11086
11087 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11088 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11089
11090 if (!zero)
11091 {
11092 if (framesize < 0)
11093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11094 else
11095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11096 }
11097 }
11098
11099 JUMPTO(SLJIT_JUMP, loop);
11100 flush_stubs(common);
11101
11102 compile_backtrackingpath(common, backtrack->top);
11103 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11104 return NULL;
11105 set_jumps(backtrack->topbacktracks, LABEL());
11106
11107 if (framesize < 0)
11108 {
11109 if (offset != 0)
11110 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11111 else
11112 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11113 }
11114 else
11115 {
11116 if (offset != 0)
11117 {
11118 /* Last alternative. */
11119 if (*cc == OP_KETRPOS)
11120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11121 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11122 }
11123 else
11124 {
11125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11126 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11127 }
11128 }
11129
11130 if (*cc == OP_KETRPOS)
11131 break;
11132 ccbegin = cc + 1 + LINK_SIZE;
11133 }
11134
11135 /* We don't have to restore the control head in case of a failed match. */
11136
11137 backtrack->topbacktracks = NULL;
11138 if (!zero)
11139 {
11140 if (framesize < 0)
11141 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11142 else /* TMP2 is set to [private_data_ptr] above. */
11143 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11144 }
11145
11146 /* None of them matched. */
11147 set_jumps(emptymatch, LABEL());
11148 count_match(common);
11149 return cc + 1 + LINK_SIZE;
11150 }
11151
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11152 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11153 {
11154 int class_len;
11155
11156 *opcode = *cc;
11157 *exact = 0;
11158
11159 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11160 {
11161 cc++;
11162 *type = OP_CHAR;
11163 }
11164 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11165 {
11166 cc++;
11167 *type = OP_CHARI;
11168 *opcode -= OP_STARI - OP_STAR;
11169 }
11170 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11171 {
11172 cc++;
11173 *type = OP_NOT;
11174 *opcode -= OP_NOTSTAR - OP_STAR;
11175 }
11176 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11177 {
11178 cc++;
11179 *type = OP_NOTI;
11180 *opcode -= OP_NOTSTARI - OP_STAR;
11181 }
11182 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11183 {
11184 cc++;
11185 *opcode -= OP_TYPESTAR - OP_STAR;
11186 *type = OP_END;
11187 }
11188 else
11189 {
11190 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11191 *type = *opcode;
11192 cc++;
11193 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11194 *opcode = cc[class_len - 1];
11195
11196 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11197 {
11198 *opcode -= OP_CRSTAR - OP_STAR;
11199 *end = cc + class_len;
11200
11201 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11202 {
11203 *exact = 1;
11204 *opcode -= OP_PLUS - OP_STAR;
11205 }
11206 }
11207 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11208 {
11209 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11210 *end = cc + class_len;
11211
11212 if (*opcode == OP_POSPLUS)
11213 {
11214 *exact = 1;
11215 *opcode = OP_POSSTAR;
11216 }
11217 }
11218 else
11219 {
11220 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11221 *max = GET2(cc, (class_len + IMM2_SIZE));
11222 *exact = GET2(cc, class_len);
11223
11224 if (*max == 0)
11225 {
11226 if (*opcode == OP_CRPOSRANGE)
11227 *opcode = OP_POSSTAR;
11228 else
11229 *opcode -= OP_CRRANGE - OP_STAR;
11230 }
11231 else
11232 {
11233 *max -= *exact;
11234 if (*max == 0)
11235 *opcode = OP_EXACT;
11236 else if (*max == 1)
11237 {
11238 if (*opcode == OP_CRPOSRANGE)
11239 *opcode = OP_POSQUERY;
11240 else
11241 *opcode -= OP_CRRANGE - OP_QUERY;
11242 }
11243 else
11244 {
11245 if (*opcode == OP_CRPOSRANGE)
11246 *opcode = OP_POSUPTO;
11247 else
11248 *opcode -= OP_CRRANGE - OP_UPTO;
11249 }
11250 }
11251 *end = cc + class_len + 2 * IMM2_SIZE;
11252 }
11253 return cc;
11254 }
11255
11256 switch(*opcode)
11257 {
11258 case OP_EXACT:
11259 *exact = GET2(cc, 0);
11260 cc += IMM2_SIZE;
11261 break;
11262
11263 case OP_PLUS:
11264 case OP_MINPLUS:
11265 *exact = 1;
11266 *opcode -= OP_PLUS - OP_STAR;
11267 break;
11268
11269 case OP_POSPLUS:
11270 *exact = 1;
11271 *opcode = OP_POSSTAR;
11272 break;
11273
11274 case OP_UPTO:
11275 case OP_MINUPTO:
11276 case OP_POSUPTO:
11277 *max = GET2(cc, 0);
11278 cc += IMM2_SIZE;
11279 break;
11280 }
11281
11282 if (*type == OP_END)
11283 {
11284 *type = *cc;
11285 *end = next_opcode(common, cc);
11286 cc++;
11287 return cc;
11288 }
11289
11290 *end = cc + 1;
11291 #ifdef SUPPORT_UNICODE
11292 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11293 #endif
11294 return cc;
11295 }
11296
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11297 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11298 {
11299 DEFINE_COMPILER;
11300 backtrack_common *backtrack;
11301 PCRE2_UCHAR opcode;
11302 PCRE2_UCHAR type;
11303 sljit_u32 max = 0, exact;
11304 BOOL fast_fail;
11305 sljit_s32 fast_str_ptr;
11306 BOOL charpos_enabled;
11307 PCRE2_UCHAR charpos_char;
11308 unsigned int charpos_othercasebit;
11309 PCRE2_SPTR end;
11310 jump_list *no_match = NULL;
11311 jump_list *no_char1_match = NULL;
11312 struct sljit_jump *jump = NULL;
11313 struct sljit_label *label;
11314 int private_data_ptr = PRIVATE_DATA(cc);
11315 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11316 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11317 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11318 int tmp_base, tmp_offset;
11319
11320 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11321
11322 fast_str_ptr = PRIVATE_DATA(cc + 1);
11323 fast_fail = TRUE;
11324
11325 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
11326
11327 if (cc == common->fast_forward_bc_ptr)
11328 fast_fail = FALSE;
11329 else if (common->fast_fail_start_ptr == 0)
11330 fast_str_ptr = 0;
11331
11332 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
11333 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
11334
11335 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11336
11337 if (type != OP_EXTUNI)
11338 {
11339 tmp_base = TMP3;
11340 tmp_offset = 0;
11341 }
11342 else
11343 {
11344 tmp_base = SLJIT_MEM1(SLJIT_SP);
11345 tmp_offset = POSSESSIVE0;
11346 }
11347
11348 if (fast_fail && fast_str_ptr != 0)
11349 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
11350
11351 /* Handle fixed part first. */
11352 if (exact > 1)
11353 {
11354 SLJIT_ASSERT(fast_str_ptr == 0);
11355 if (common->mode == PCRE2_JIT_COMPLETE
11356 #ifdef SUPPORT_UNICODE
11357 && !common->utf
11358 #endif
11359 && type != OP_ANYNL && type != OP_EXTUNI)
11360 {
11361 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11362 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11363 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11364 label = LABEL();
11365 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11366 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11367 JUMPTO(SLJIT_NOT_ZERO, label);
11368 }
11369 else
11370 {
11371 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11372 label = LABEL();
11373 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11374 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11375 JUMPTO(SLJIT_NOT_ZERO, label);
11376 }
11377 }
11378 else if (exact == 1)
11379 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11380
11381 switch(opcode)
11382 {
11383 case OP_STAR:
11384 case OP_UPTO:
11385 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
11386
11387 if (type == OP_ANYNL || type == OP_EXTUNI)
11388 {
11389 SLJIT_ASSERT(private_data_ptr == 0);
11390 SLJIT_ASSERT(fast_str_ptr == 0);
11391
11392 allocate_stack(common, 2);
11393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11395
11396 if (opcode == OP_UPTO)
11397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11398
11399 label = LABEL();
11400 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11401 if (opcode == OP_UPTO)
11402 {
11403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11404 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11405 jump = JUMP(SLJIT_ZERO);
11406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11407 }
11408
11409 /* We cannot use TMP3 because of this allocate_stack. */
11410 allocate_stack(common, 1);
11411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11412 JUMPTO(SLJIT_JUMP, label);
11413 if (jump != NULL)
11414 JUMPHERE(jump);
11415 }
11416 else
11417 {
11418 charpos_enabled = FALSE;
11419 charpos_char = 0;
11420 charpos_othercasebit = 0;
11421
11422 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11423 {
11424 charpos_enabled = TRUE;
11425 #ifdef SUPPORT_UNICODE
11426 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11427 #endif
11428 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11429 {
11430 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11431 if (charpos_othercasebit == 0)
11432 charpos_enabled = FALSE;
11433 }
11434
11435 if (charpos_enabled)
11436 {
11437 charpos_char = end[1];
11438 /* Consumpe the OP_CHAR opcode. */
11439 end += 2;
11440 #if PCRE2_CODE_UNIT_WIDTH == 8
11441 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11442 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11443 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11444 if ((charpos_othercasebit & 0x100) != 0)
11445 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11446 #endif
11447 if (charpos_othercasebit != 0)
11448 charpos_char |= charpos_othercasebit;
11449
11450 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11451 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11452 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11453 }
11454 }
11455
11456 if (charpos_enabled)
11457 {
11458 if (opcode == OP_UPTO)
11459 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11460
11461 /* Search the first instance of charpos_char. */
11462 jump = JUMP(SLJIT_JUMP);
11463 label = LABEL();
11464 if (opcode == OP_UPTO)
11465 {
11466 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11468 }
11469 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11470 if (fast_str_ptr != 0)
11471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11472 JUMPHERE(jump);
11473
11474 detect_partial_match(common, &backtrack->topbacktracks);
11475 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11476 if (charpos_othercasebit != 0)
11477 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11478 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11479
11480 if (private_data_ptr == 0)
11481 allocate_stack(common, 2);
11482 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11483 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11484 if (opcode == OP_UPTO)
11485 {
11486 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11487 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11488 }
11489
11490 /* Search the last instance of charpos_char. */
11491 label = LABEL();
11492 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11493 if (fast_str_ptr != 0)
11494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11495 detect_partial_match(common, &no_match);
11496 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11497 if (charpos_othercasebit != 0)
11498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11499 if (opcode == OP_STAR)
11500 {
11501 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11502 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11503 }
11504 else
11505 {
11506 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11507 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11508 JUMPHERE(jump);
11509 }
11510
11511 if (opcode == OP_UPTO)
11512 {
11513 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11514 JUMPTO(SLJIT_NOT_ZERO, label);
11515 }
11516 else
11517 JUMPTO(SLJIT_JUMP, label);
11518
11519 set_jumps(no_match, LABEL());
11520 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11521 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11522 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11523 }
11524 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11525 else if (common->utf)
11526 {
11527 if (private_data_ptr == 0)
11528 allocate_stack(common, 2);
11529
11530 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11531 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11532
11533 if (opcode == OP_UPTO)
11534 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11535
11536 label = LABEL();
11537 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11538 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11539
11540 if (opcode == OP_UPTO)
11541 {
11542 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11543 JUMPTO(SLJIT_NOT_ZERO, label);
11544 }
11545 else
11546 JUMPTO(SLJIT_JUMP, label);
11547
11548 set_jumps(no_match, LABEL());
11549 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11550 if (fast_str_ptr != 0)
11551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11552 }
11553 #endif
11554 else
11555 {
11556 if (private_data_ptr == 0)
11557 allocate_stack(common, 2);
11558
11559 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11560 if (opcode == OP_UPTO)
11561 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11562
11563 label = LABEL();
11564 detect_partial_match(common, &no_match);
11565 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11566 if (opcode == OP_UPTO)
11567 {
11568 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11569 JUMPTO(SLJIT_NOT_ZERO, label);
11570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11571 }
11572 else
11573 JUMPTO(SLJIT_JUMP, label);
11574
11575 set_jumps(no_char1_match, LABEL());
11576 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11577 set_jumps(no_match, LABEL());
11578 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11579 if (fast_str_ptr != 0)
11580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11581 }
11582 }
11583 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11584 break;
11585
11586 case OP_MINSTAR:
11587 if (private_data_ptr == 0)
11588 allocate_stack(common, 1);
11589 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11590 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11591 if (fast_str_ptr != 0)
11592 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11593 break;
11594
11595 case OP_MINUPTO:
11596 SLJIT_ASSERT(fast_str_ptr == 0);
11597 if (private_data_ptr == 0)
11598 allocate_stack(common, 2);
11599 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11600 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11601 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11602 break;
11603
11604 case OP_QUERY:
11605 case OP_MINQUERY:
11606 SLJIT_ASSERT(fast_str_ptr == 0);
11607 if (private_data_ptr == 0)
11608 allocate_stack(common, 1);
11609 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11610 if (opcode == OP_QUERY)
11611 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11612 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11613 break;
11614
11615 case OP_EXACT:
11616 break;
11617
11618 case OP_POSSTAR:
11619 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11620 if (common->utf)
11621 {
11622 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11623 label = LABEL();
11624 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11625 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11626 JUMPTO(SLJIT_JUMP, label);
11627 set_jumps(no_match, LABEL());
11628 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11629 if (fast_str_ptr != 0)
11630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11631 break;
11632 }
11633 #endif
11634 label = LABEL();
11635 detect_partial_match(common, &no_match);
11636 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11637 JUMPTO(SLJIT_JUMP, label);
11638 set_jumps(no_char1_match, LABEL());
11639 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11640 set_jumps(no_match, LABEL());
11641 if (fast_str_ptr != 0)
11642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
11643 break;
11644
11645 case OP_POSUPTO:
11646 SLJIT_ASSERT(fast_str_ptr == 0);
11647 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11648 if (common->utf)
11649 {
11650 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11651 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11652 label = LABEL();
11653 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11654 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11655 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11656 JUMPTO(SLJIT_NOT_ZERO, label);
11657 set_jumps(no_match, LABEL());
11658 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11659 break;
11660 }
11661 #endif
11662 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11663 label = LABEL();
11664 detect_partial_match(common, &no_match);
11665 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11666 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11667 JUMPTO(SLJIT_NOT_ZERO, label);
11668 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11669 set_jumps(no_char1_match, LABEL());
11670 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11671 set_jumps(no_match, LABEL());
11672 break;
11673
11674 case OP_POSQUERY:
11675 SLJIT_ASSERT(fast_str_ptr == 0);
11676 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11677 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11678 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11679 set_jumps(no_match, LABEL());
11680 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11681 break;
11682
11683 default:
11684 SLJIT_UNREACHABLE();
11685 break;
11686 }
11687
11688 count_match(common);
11689 return end;
11690 }
11691
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11692 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11693 {
11694 DEFINE_COMPILER;
11695 backtrack_common *backtrack;
11696
11697 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11698
11699 if (*cc == OP_FAIL)
11700 {
11701 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11702 return cc + 1;
11703 }
11704
11705 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11706 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11707
11708 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11709 {
11710 /* No need to check notempty conditions. */
11711 if (common->accept_label == NULL)
11712 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11713 else
11714 JUMPTO(SLJIT_JUMP, common->accept_label);
11715 return cc + 1;
11716 }
11717
11718 if (common->accept_label == NULL)
11719 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11720 else
11721 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11722 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11723 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11724 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11725 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11726 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11727 if (common->accept_label == NULL)
11728 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11729 else
11730 JUMPTO(SLJIT_ZERO, common->accept_label);
11731 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11732 if (common->accept_label == NULL)
11733 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11734 else
11735 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11736 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11737 return cc + 1;
11738 }
11739
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11740 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11741 {
11742 DEFINE_COMPILER;
11743 int offset = GET2(cc, 1);
11744 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11745
11746 /* Data will be discarded anyway... */
11747 if (common->currententry != NULL)
11748 return cc + 1 + IMM2_SIZE;
11749
11750 if (!optimized_cbracket)
11751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11752 offset <<= 1;
11753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11754 if (!optimized_cbracket)
11755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11756 return cc + 1 + IMM2_SIZE;
11757 }
11758
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11759 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11760 {
11761 DEFINE_COMPILER;
11762 backtrack_common *backtrack;
11763 PCRE2_UCHAR opcode = *cc;
11764 PCRE2_SPTR ccend = cc + 1;
11765
11766 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11767 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11768 ccend += 2 + cc[1];
11769
11770 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11771
11772 if (opcode == OP_SKIP)
11773 {
11774 allocate_stack(common, 1);
11775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11776 return ccend;
11777 }
11778
11779 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11780 {
11781 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11782 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11784 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11785 }
11786
11787 return ccend;
11788 }
11789
11790 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11791
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11792 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11793 {
11794 DEFINE_COMPILER;
11795 backtrack_common *backtrack;
11796 BOOL needs_control_head;
11797 int size;
11798
11799 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11800 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11801 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11802 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11803 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11804
11805 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11806 size = 3 + (size < 0 ? 0 : size);
11807
11808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11809 allocate_stack(common, size);
11810 if (size > 3)
11811 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11812 else
11813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11817
11818 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11819 if (size >= 0)
11820 init_frame(common, cc, ccend, size - 1, 0);
11821 }
11822
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11823 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11824 {
11825 DEFINE_COMPILER;
11826 backtrack_common *backtrack;
11827 BOOL has_then_trap = FALSE;
11828 then_trap_backtrack *save_then_trap = NULL;
11829
11830 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11831
11832 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11833 {
11834 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11835 has_then_trap = TRUE;
11836 save_then_trap = common->then_trap;
11837 /* Tail item on backtrack. */
11838 compile_then_trap_matchingpath(common, cc, ccend, parent);
11839 }
11840
11841 while (cc < ccend)
11842 {
11843 switch(*cc)
11844 {
11845 case OP_SOD:
11846 case OP_SOM:
11847 case OP_NOT_WORD_BOUNDARY:
11848 case OP_WORD_BOUNDARY:
11849 case OP_EODN:
11850 case OP_EOD:
11851 case OP_DOLL:
11852 case OP_DOLLM:
11853 case OP_CIRC:
11854 case OP_CIRCM:
11855 case OP_REVERSE:
11856 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11857 break;
11858
11859 case OP_NOT_DIGIT:
11860 case OP_DIGIT:
11861 case OP_NOT_WHITESPACE:
11862 case OP_WHITESPACE:
11863 case OP_NOT_WORDCHAR:
11864 case OP_WORDCHAR:
11865 case OP_ANY:
11866 case OP_ALLANY:
11867 case OP_ANYBYTE:
11868 case OP_NOTPROP:
11869 case OP_PROP:
11870 case OP_ANYNL:
11871 case OP_NOT_HSPACE:
11872 case OP_HSPACE:
11873 case OP_NOT_VSPACE:
11874 case OP_VSPACE:
11875 case OP_EXTUNI:
11876 case OP_NOT:
11877 case OP_NOTI:
11878 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11879 break;
11880
11881 case OP_SET_SOM:
11882 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11883 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11884 allocate_stack(common, 1);
11885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11887 cc++;
11888 break;
11889
11890 case OP_CHAR:
11891 case OP_CHARI:
11892 if (common->mode == PCRE2_JIT_COMPLETE)
11893 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11894 else
11895 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11896 break;
11897
11898 case OP_STAR:
11899 case OP_MINSTAR:
11900 case OP_PLUS:
11901 case OP_MINPLUS:
11902 case OP_QUERY:
11903 case OP_MINQUERY:
11904 case OP_UPTO:
11905 case OP_MINUPTO:
11906 case OP_EXACT:
11907 case OP_POSSTAR:
11908 case OP_POSPLUS:
11909 case OP_POSQUERY:
11910 case OP_POSUPTO:
11911 case OP_STARI:
11912 case OP_MINSTARI:
11913 case OP_PLUSI:
11914 case OP_MINPLUSI:
11915 case OP_QUERYI:
11916 case OP_MINQUERYI:
11917 case OP_UPTOI:
11918 case OP_MINUPTOI:
11919 case OP_EXACTI:
11920 case OP_POSSTARI:
11921 case OP_POSPLUSI:
11922 case OP_POSQUERYI:
11923 case OP_POSUPTOI:
11924 case OP_NOTSTAR:
11925 case OP_NOTMINSTAR:
11926 case OP_NOTPLUS:
11927 case OP_NOTMINPLUS:
11928 case OP_NOTQUERY:
11929 case OP_NOTMINQUERY:
11930 case OP_NOTUPTO:
11931 case OP_NOTMINUPTO:
11932 case OP_NOTEXACT:
11933 case OP_NOTPOSSTAR:
11934 case OP_NOTPOSPLUS:
11935 case OP_NOTPOSQUERY:
11936 case OP_NOTPOSUPTO:
11937 case OP_NOTSTARI:
11938 case OP_NOTMINSTARI:
11939 case OP_NOTPLUSI:
11940 case OP_NOTMINPLUSI:
11941 case OP_NOTQUERYI:
11942 case OP_NOTMINQUERYI:
11943 case OP_NOTUPTOI:
11944 case OP_NOTMINUPTOI:
11945 case OP_NOTEXACTI:
11946 case OP_NOTPOSSTARI:
11947 case OP_NOTPOSPLUSI:
11948 case OP_NOTPOSQUERYI:
11949 case OP_NOTPOSUPTOI:
11950 case OP_TYPESTAR:
11951 case OP_TYPEMINSTAR:
11952 case OP_TYPEPLUS:
11953 case OP_TYPEMINPLUS:
11954 case OP_TYPEQUERY:
11955 case OP_TYPEMINQUERY:
11956 case OP_TYPEUPTO:
11957 case OP_TYPEMINUPTO:
11958 case OP_TYPEEXACT:
11959 case OP_TYPEPOSSTAR:
11960 case OP_TYPEPOSPLUS:
11961 case OP_TYPEPOSQUERY:
11962 case OP_TYPEPOSUPTO:
11963 cc = compile_iterator_matchingpath(common, cc, parent);
11964 break;
11965
11966 case OP_CLASS:
11967 case OP_NCLASS:
11968 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11969 cc = compile_iterator_matchingpath(common, cc, parent);
11970 else
11971 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11972 break;
11973
11974 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11975 case OP_XCLASS:
11976 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
11977 cc = compile_iterator_matchingpath(common, cc, parent);
11978 else
11979 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11980 break;
11981 #endif
11982
11983 case OP_REF:
11984 case OP_REFI:
11985 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
11986 cc = compile_ref_iterator_matchingpath(common, cc, parent);
11987 else
11988 {
11989 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11990 cc += 1 + IMM2_SIZE;
11991 }
11992 break;
11993
11994 case OP_DNREF:
11995 case OP_DNREFI:
11996 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
11997 cc = compile_ref_iterator_matchingpath(common, cc, parent);
11998 else
11999 {
12000 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12001 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12002 cc += 1 + 2 * IMM2_SIZE;
12003 }
12004 break;
12005
12006 case OP_RECURSE:
12007 cc = compile_recurse_matchingpath(common, cc, parent);
12008 break;
12009
12010 case OP_CALLOUT:
12011 case OP_CALLOUT_STR:
12012 cc = compile_callout_matchingpath(common, cc, parent);
12013 break;
12014
12015 case OP_ASSERT:
12016 case OP_ASSERT_NOT:
12017 case OP_ASSERTBACK:
12018 case OP_ASSERTBACK_NOT:
12019 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12020 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12021 break;
12022
12023 case OP_BRAMINZERO:
12024 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12025 cc = bracketend(cc + 1);
12026 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12027 {
12028 allocate_stack(common, 1);
12029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12030 }
12031 else
12032 {
12033 allocate_stack(common, 2);
12034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12036 }
12037 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12038 count_match(common);
12039 break;
12040
12041 case OP_ONCE:
12042 case OP_SCRIPT_RUN:
12043 case OP_BRA:
12044 case OP_CBRA:
12045 case OP_COND:
12046 case OP_SBRA:
12047 case OP_SCBRA:
12048 case OP_SCOND:
12049 cc = compile_bracket_matchingpath(common, cc, parent);
12050 break;
12051
12052 case OP_BRAZERO:
12053 if (cc[1] > OP_ASSERTBACK_NOT)
12054 cc = compile_bracket_matchingpath(common, cc, parent);
12055 else
12056 {
12057 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12058 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12059 }
12060 break;
12061
12062 case OP_BRAPOS:
12063 case OP_CBRAPOS:
12064 case OP_SBRAPOS:
12065 case OP_SCBRAPOS:
12066 case OP_BRAPOSZERO:
12067 cc = compile_bracketpos_matchingpath(common, cc, parent);
12068 break;
12069
12070 case OP_MARK:
12071 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12072 SLJIT_ASSERT(common->mark_ptr != 0);
12073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12074 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12075 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12076 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12077 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12079 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12080 if (common->has_skip_arg)
12081 {
12082 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12088 }
12089 cc += 1 + 2 + cc[1];
12090 break;
12091
12092 case OP_PRUNE:
12093 case OP_PRUNE_ARG:
12094 case OP_SKIP:
12095 case OP_SKIP_ARG:
12096 case OP_THEN:
12097 case OP_THEN_ARG:
12098 case OP_COMMIT:
12099 case OP_COMMIT_ARG:
12100 cc = compile_control_verb_matchingpath(common, cc, parent);
12101 break;
12102
12103 case OP_FAIL:
12104 case OP_ACCEPT:
12105 case OP_ASSERT_ACCEPT:
12106 cc = compile_fail_accept_matchingpath(common, cc, parent);
12107 break;
12108
12109 case OP_CLOSE:
12110 cc = compile_close_matchingpath(common, cc);
12111 break;
12112
12113 case OP_SKIPZERO:
12114 cc = bracketend(cc + 1);
12115 break;
12116
12117 default:
12118 SLJIT_UNREACHABLE();
12119 return;
12120 }
12121 if (cc == NULL)
12122 return;
12123 }
12124
12125 if (has_then_trap)
12126 {
12127 /* Head item on backtrack. */
12128 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12129 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12130 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12131 common->then_trap = save_then_trap;
12132 }
12133 SLJIT_ASSERT(cc == ccend);
12134 }
12135
12136 #undef PUSH_BACKTRACK
12137 #undef PUSH_BACKTRACK_NOVALUE
12138 #undef BACKTRACK_AS
12139
12140 #define COMPILE_BACKTRACKINGPATH(current) \
12141 do \
12142 { \
12143 compile_backtrackingpath(common, (current)); \
12144 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12145 return; \
12146 } \
12147 while (0)
12148
12149 #define CURRENT_AS(type) ((type *)current)
12150
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12151 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12152 {
12153 DEFINE_COMPILER;
12154 PCRE2_SPTR cc = current->cc;
12155 PCRE2_UCHAR opcode;
12156 PCRE2_UCHAR type;
12157 sljit_u32 max = 0, exact;
12158 struct sljit_label *label = NULL;
12159 struct sljit_jump *jump = NULL;
12160 jump_list *jumplist = NULL;
12161 PCRE2_SPTR end;
12162 int private_data_ptr = PRIVATE_DATA(cc);
12163 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12164 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12165 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12166
12167 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12168
12169 switch(opcode)
12170 {
12171 case OP_STAR:
12172 case OP_UPTO:
12173 if (type == OP_ANYNL || type == OP_EXTUNI)
12174 {
12175 SLJIT_ASSERT(private_data_ptr == 0);
12176 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12177 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12178 free_stack(common, 1);
12179 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12180 }
12181 else
12182 {
12183 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12184 {
12185 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12186 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12187 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12188
12189 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12190 label = LABEL();
12191 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12192 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12194 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12195 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12196 move_back(common, NULL, TRUE);
12197 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12198 }
12199 else
12200 {
12201 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12202 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12203 move_back(common, NULL, TRUE);
12204 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12205 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12206 }
12207 JUMPHERE(jump);
12208 if (private_data_ptr == 0)
12209 free_stack(common, 2);
12210 }
12211 break;
12212
12213 case OP_MINSTAR:
12214 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12215 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12216 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12217 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12218 set_jumps(jumplist, LABEL());
12219 if (private_data_ptr == 0)
12220 free_stack(common, 1);
12221 break;
12222
12223 case OP_MINUPTO:
12224 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12225 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12226 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12227 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12228
12229 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12230 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12231 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12232 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12233
12234 set_jumps(jumplist, LABEL());
12235 if (private_data_ptr == 0)
12236 free_stack(common, 2);
12237 break;
12238
12239 case OP_QUERY:
12240 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12241 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12242 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12243 jump = JUMP(SLJIT_JUMP);
12244 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12245 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12246 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12247 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12248 JUMPHERE(jump);
12249 if (private_data_ptr == 0)
12250 free_stack(common, 1);
12251 break;
12252
12253 case OP_MINQUERY:
12254 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12255 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12256 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12257 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12258 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12259 set_jumps(jumplist, LABEL());
12260 JUMPHERE(jump);
12261 if (private_data_ptr == 0)
12262 free_stack(common, 1);
12263 break;
12264
12265 case OP_EXACT:
12266 case OP_POSSTAR:
12267 case OP_POSQUERY:
12268 case OP_POSUPTO:
12269 break;
12270
12271 default:
12272 SLJIT_UNREACHABLE();
12273 break;
12274 }
12275
12276 set_jumps(current->topbacktracks, LABEL());
12277 }
12278
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12279 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12280 {
12281 DEFINE_COMPILER;
12282 PCRE2_SPTR cc = current->cc;
12283 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12284 PCRE2_UCHAR type;
12285
12286 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12287
12288 if ((type & 0x1) == 0)
12289 {
12290 /* Maximize case. */
12291 set_jumps(current->topbacktracks, LABEL());
12292 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12293 free_stack(common, 1);
12294 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12295 return;
12296 }
12297
12298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12299 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12300 set_jumps(current->topbacktracks, LABEL());
12301 free_stack(common, ref ? 2 : 3);
12302 }
12303
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12304 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12305 {
12306 DEFINE_COMPILER;
12307 recurse_entry *entry;
12308
12309 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12310 {
12311 entry = CURRENT_AS(recurse_backtrack)->entry;
12312 if (entry->backtrack_label == NULL)
12313 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12314 else
12315 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12316 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12317 }
12318 else
12319 compile_backtrackingpath(common, current->top);
12320
12321 set_jumps(current->topbacktracks, LABEL());
12322 }
12323
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12324 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12325 {
12326 DEFINE_COMPILER;
12327 PCRE2_SPTR cc = current->cc;
12328 PCRE2_UCHAR bra = OP_BRA;
12329 struct sljit_jump *brajump = NULL;
12330
12331 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12332 if (*cc == OP_BRAZERO)
12333 {
12334 bra = *cc;
12335 cc++;
12336 }
12337
12338 if (bra == OP_BRAZERO)
12339 {
12340 SLJIT_ASSERT(current->topbacktracks == NULL);
12341 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12342 }
12343
12344 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12345 {
12346 set_jumps(current->topbacktracks, LABEL());
12347
12348 if (bra == OP_BRAZERO)
12349 {
12350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12351 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12352 free_stack(common, 1);
12353 }
12354 return;
12355 }
12356
12357 if (bra == OP_BRAZERO)
12358 {
12359 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12360 {
12361 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12362 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12363 free_stack(common, 1);
12364 return;
12365 }
12366 free_stack(common, 1);
12367 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12368 }
12369
12370 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12371 {
12372 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12373 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12375 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12377
12378 set_jumps(current->topbacktracks, LABEL());
12379 }
12380 else
12381 set_jumps(current->topbacktracks, LABEL());
12382
12383 if (bra == OP_BRAZERO)
12384 {
12385 /* We know there is enough place on the stack. */
12386 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12388 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12389 JUMPHERE(brajump);
12390 }
12391 }
12392
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12393 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12394 {
12395 DEFINE_COMPILER;
12396 int opcode, stacksize, alt_count, alt_max;
12397 int offset = 0;
12398 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12399 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12400 PCRE2_SPTR cc = current->cc;
12401 PCRE2_SPTR ccbegin;
12402 PCRE2_SPTR ccprev;
12403 PCRE2_UCHAR bra = OP_BRA;
12404 PCRE2_UCHAR ket;
12405 assert_backtrack *assert;
12406 sljit_uw *next_update_addr = NULL;
12407 BOOL has_alternatives;
12408 BOOL needs_control_head = FALSE;
12409 struct sljit_jump *brazero = NULL;
12410 struct sljit_jump *alt1 = NULL;
12411 struct sljit_jump *alt2 = NULL;
12412 struct sljit_jump *once = NULL;
12413 struct sljit_jump *cond = NULL;
12414 struct sljit_label *rmin_label = NULL;
12415 struct sljit_label *exact_label = NULL;
12416
12417 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12418 {
12419 bra = *cc;
12420 cc++;
12421 }
12422
12423 opcode = *cc;
12424 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12425 ket = *ccbegin;
12426 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12427 {
12428 repeat_ptr = PRIVATE_DATA(ccbegin);
12429 repeat_type = PRIVATE_DATA(ccbegin + 2);
12430 repeat_count = PRIVATE_DATA(ccbegin + 3);
12431 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12432 if (repeat_type == OP_UPTO)
12433 ket = OP_KETRMAX;
12434 if (repeat_type == OP_MINUPTO)
12435 ket = OP_KETRMIN;
12436 }
12437 ccbegin = cc;
12438 cc += GET(cc, 1);
12439 has_alternatives = *cc == OP_ALT;
12440 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12441 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12442 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12443 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12444 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12445 opcode = OP_SCOND;
12446
12447 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12448
12449 /* Decoding the needs_control_head in framesize. */
12450 if (opcode == OP_ONCE)
12451 {
12452 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12453 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12454 }
12455
12456 if (ket != OP_KET && repeat_type != 0)
12457 {
12458 /* TMP1 is used in OP_KETRMIN below. */
12459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12460 free_stack(common, 1);
12461 if (repeat_type == OP_UPTO)
12462 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12463 else
12464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12465 }
12466
12467 if (ket == OP_KETRMAX)
12468 {
12469 if (bra == OP_BRAZERO)
12470 {
12471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12472 free_stack(common, 1);
12473 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12474 }
12475 }
12476 else if (ket == OP_KETRMIN)
12477 {
12478 if (bra != OP_BRAMINZERO)
12479 {
12480 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12481 if (repeat_type != 0)
12482 {
12483 /* TMP1 was set a few lines above. */
12484 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12485 /* Drop STR_PTR for non-greedy plus quantifier. */
12486 if (opcode != OP_ONCE)
12487 free_stack(common, 1);
12488 }
12489 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12490 {
12491 /* Checking zero-length iteration. */
12492 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12493 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12494 else
12495 {
12496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12497 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12498 }
12499 /* Drop STR_PTR for non-greedy plus quantifier. */
12500 if (opcode != OP_ONCE)
12501 free_stack(common, 1);
12502 }
12503 else
12504 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12505 }
12506 rmin_label = LABEL();
12507 if (repeat_type != 0)
12508 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12509 }
12510 else if (bra == OP_BRAZERO)
12511 {
12512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12513 free_stack(common, 1);
12514 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12515 }
12516 else if (repeat_type == OP_EXACT)
12517 {
12518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12519 exact_label = LABEL();
12520 }
12521
12522 if (offset != 0)
12523 {
12524 if (common->capture_last_ptr != 0)
12525 {
12526 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12528 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12531 free_stack(common, 3);
12532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12534 }
12535 else if (common->optimized_cbracket[offset >> 1] == 0)
12536 {
12537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12538 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12539 free_stack(common, 2);
12540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12542 }
12543 }
12544
12545 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12546 {
12547 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12548 {
12549 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12550 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12551 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12552 }
12553 once = JUMP(SLJIT_JUMP);
12554 }
12555 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12556 {
12557 if (has_alternatives)
12558 {
12559 /* Always exactly one alternative. */
12560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12561 free_stack(common, 1);
12562
12563 alt_max = 2;
12564 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
12565 }
12566 }
12567 else if (has_alternatives)
12568 {
12569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12570 free_stack(common, 1);
12571
12572 if (alt_max > 4)
12573 {
12574 /* Table jump if alt_max is greater than 4. */
12575 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
12576 if (SLJIT_UNLIKELY(next_update_addr == NULL))
12577 return;
12578 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
12579 add_label_addr(common, next_update_addr++);
12580 }
12581 else
12582 {
12583 if (alt_max == 4)
12584 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
12585 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
12586 }
12587 }
12588
12589 COMPILE_BACKTRACKINGPATH(current->top);
12590 if (current->topbacktracks)
12591 set_jumps(current->topbacktracks, LABEL());
12592
12593 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12594 {
12595 /* Conditional block always has at most one alternative. */
12596 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12597 {
12598 SLJIT_ASSERT(has_alternatives);
12599 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12600 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12601 {
12602 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12603 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12605 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12607 }
12608 cond = JUMP(SLJIT_JUMP);
12609 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12610 }
12611 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12612 {
12613 SLJIT_ASSERT(has_alternatives);
12614 cond = JUMP(SLJIT_JUMP);
12615 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12616 }
12617 else
12618 SLJIT_ASSERT(!has_alternatives);
12619 }
12620
12621 if (has_alternatives)
12622 {
12623 alt_count = sizeof(sljit_uw);
12624 do
12625 {
12626 current->top = NULL;
12627 current->topbacktracks = NULL;
12628 current->nextbacktracks = NULL;
12629 /* Conditional blocks always have an additional alternative, even if it is empty. */
12630 if (*cc == OP_ALT)
12631 {
12632 ccprev = cc + 1 + LINK_SIZE;
12633 cc += GET(cc, 1);
12634 if (opcode != OP_COND && opcode != OP_SCOND)
12635 {
12636 if (opcode != OP_ONCE)
12637 {
12638 if (private_data_ptr != 0)
12639 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12640 else
12641 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12642 }
12643 else
12644 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12645 }
12646 compile_matchingpath(common, ccprev, cc, current);
12647 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12648 return;
12649
12650 if (opcode == OP_SCRIPT_RUN)
12651 match_script_run_common(common, private_data_ptr, current);
12652 }
12653
12654 /* Instructions after the current alternative is successfully matched. */
12655 /* There is a similar code in compile_bracket_matchingpath. */
12656 if (opcode == OP_ONCE)
12657 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12658
12659 stacksize = 0;
12660 if (repeat_type == OP_MINUPTO)
12661 {
12662 /* We need to preserve the counter. TMP2 will be used below. */
12663 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12664 stacksize++;
12665 }
12666 if (ket != OP_KET || bra != OP_BRA)
12667 stacksize++;
12668 if (offset != 0)
12669 {
12670 if (common->capture_last_ptr != 0)
12671 stacksize++;
12672 if (common->optimized_cbracket[offset >> 1] == 0)
12673 stacksize += 2;
12674 }
12675 if (opcode != OP_ONCE)
12676 stacksize++;
12677
12678 if (stacksize > 0)
12679 allocate_stack(common, stacksize);
12680
12681 stacksize = 0;
12682 if (repeat_type == OP_MINUPTO)
12683 {
12684 /* TMP2 was set above. */
12685 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12686 stacksize++;
12687 }
12688
12689 if (ket != OP_KET || bra != OP_BRA)
12690 {
12691 if (ket != OP_KET)
12692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12693 else
12694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12695 stacksize++;
12696 }
12697
12698 if (offset != 0)
12699 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12700
12701 if (opcode != OP_ONCE)
12702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12703
12704 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12705 {
12706 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12707 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12709 }
12710
12711 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12712
12713 if (opcode != OP_ONCE)
12714 {
12715 if (alt_max > 4)
12716 add_label_addr(common, next_update_addr++);
12717 else
12718 {
12719 if (alt_count != 2 * sizeof(sljit_uw))
12720 {
12721 JUMPHERE(alt1);
12722 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
12723 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
12724 }
12725 else
12726 {
12727 JUMPHERE(alt2);
12728 if (alt_max == 4)
12729 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
12730 }
12731 }
12732 alt_count += sizeof(sljit_uw);
12733 }
12734
12735 COMPILE_BACKTRACKINGPATH(current->top);
12736 if (current->topbacktracks)
12737 set_jumps(current->topbacktracks, LABEL());
12738 SLJIT_ASSERT(!current->nextbacktracks);
12739 }
12740 while (*cc == OP_ALT);
12741
12742 if (cond != NULL)
12743 {
12744 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12745 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12746 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12747 {
12748 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12749 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12751 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12752 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12753 }
12754 JUMPHERE(cond);
12755 }
12756
12757 /* Free the STR_PTR. */
12758 if (private_data_ptr == 0)
12759 free_stack(common, 1);
12760 }
12761
12762 if (offset != 0)
12763 {
12764 /* Using both tmp register is better for instruction scheduling. */
12765 if (common->optimized_cbracket[offset >> 1] != 0)
12766 {
12767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12769 free_stack(common, 2);
12770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12772 }
12773 else
12774 {
12775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12776 free_stack(common, 1);
12777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12778 }
12779 }
12780 else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12781 {
12782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12783 free_stack(common, 1);
12784 }
12785 else if (opcode == OP_ONCE)
12786 {
12787 cc = ccbegin + GET(ccbegin, 1);
12788 stacksize = needs_control_head ? 1 : 0;
12789
12790 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12791 {
12792 /* Reset head and drop saved frame. */
12793 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12794 }
12795 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12796 {
12797 /* The STR_PTR must be released. */
12798 stacksize++;
12799 }
12800
12801 if (stacksize > 0)
12802 free_stack(common, stacksize);
12803
12804 JUMPHERE(once);
12805 /* Restore previous private_data_ptr */
12806 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12807 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12808 else if (ket == OP_KETRMIN)
12809 {
12810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12811 /* See the comment below. */
12812 free_stack(common, 2);
12813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12814 }
12815 }
12816
12817 if (repeat_type == OP_EXACT)
12818 {
12819 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12821 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12822 }
12823 else if (ket == OP_KETRMAX)
12824 {
12825 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826 if (bra != OP_BRAZERO)
12827 free_stack(common, 1);
12828
12829 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12830 if (bra == OP_BRAZERO)
12831 {
12832 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12833 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12834 JUMPHERE(brazero);
12835 free_stack(common, 1);
12836 }
12837 }
12838 else if (ket == OP_KETRMIN)
12839 {
12840 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12841
12842 /* OP_ONCE removes everything in case of a backtrack, so we don't
12843 need to explicitly release the STR_PTR. The extra release would
12844 affect badly the free_stack(2) above. */
12845 if (opcode != OP_ONCE)
12846 free_stack(common, 1);
12847 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12848 if (opcode == OP_ONCE)
12849 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12850 else if (bra == OP_BRAMINZERO)
12851 free_stack(common, 1);
12852 }
12853 else if (bra == OP_BRAZERO)
12854 {
12855 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12856 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12857 JUMPHERE(brazero);
12858 }
12859 }
12860
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12861 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12862 {
12863 DEFINE_COMPILER;
12864 int offset;
12865 struct sljit_jump *jump;
12866
12867 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12868 {
12869 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12870 {
12871 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12875 if (common->capture_last_ptr != 0)
12876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12877 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12878 if (common->capture_last_ptr != 0)
12879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12880 }
12881 set_jumps(current->topbacktracks, LABEL());
12882 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12883 return;
12884 }
12885
12886 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12887 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12888 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12889
12890 if (current->topbacktracks)
12891 {
12892 jump = JUMP(SLJIT_JUMP);
12893 set_jumps(current->topbacktracks, LABEL());
12894 /* Drop the stack frame. */
12895 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12896 JUMPHERE(jump);
12897 }
12898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12899 }
12900
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12901 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12902 {
12903 assert_backtrack backtrack;
12904
12905 current->top = NULL;
12906 current->topbacktracks = NULL;
12907 current->nextbacktracks = NULL;
12908 if (current->cc[1] > OP_ASSERTBACK_NOT)
12909 {
12910 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12911 compile_bracket_matchingpath(common, current->cc, current);
12912 compile_bracket_backtrackingpath(common, current->top);
12913 }
12914 else
12915 {
12916 memset(&backtrack, 0, sizeof(backtrack));
12917 backtrack.common.cc = current->cc;
12918 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12919 /* Manual call of compile_assert_matchingpath. */
12920 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12921 }
12922 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12923 }
12924
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12925 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12926 {
12927 DEFINE_COMPILER;
12928 PCRE2_UCHAR opcode = *current->cc;
12929 struct sljit_label *loop;
12930 struct sljit_jump *jump;
12931
12932 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12933 {
12934 if (common->then_trap != NULL)
12935 {
12936 SLJIT_ASSERT(common->control_head_ptr != 0);
12937
12938 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12940 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12941 jump = JUMP(SLJIT_JUMP);
12942
12943 loop = LABEL();
12944 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12945 JUMPHERE(jump);
12946 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12947 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12948 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12949 return;
12950 }
12951 else if (!common->local_quit_available && common->in_positive_assertion)
12952 {
12953 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12954 return;
12955 }
12956 }
12957
12958 if (common->local_quit_available)
12959 {
12960 /* Abort match with a fail. */
12961 if (common->quit_label == NULL)
12962 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12963 else
12964 JUMPTO(SLJIT_JUMP, common->quit_label);
12965 return;
12966 }
12967
12968 if (opcode == OP_SKIP_ARG)
12969 {
12970 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12971 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12972 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12973 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
12974
12975 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12976 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12977 return;
12978 }
12979
12980 if (opcode == OP_SKIP)
12981 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12982 else
12983 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12984 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12985 }
12986
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)12987 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12988 {
12989 DEFINE_COMPILER;
12990 struct sljit_jump *jump;
12991 int size;
12992
12993 if (CURRENT_AS(then_trap_backtrack)->then_trap)
12994 {
12995 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12996 return;
12997 }
12998
12999 size = CURRENT_AS(then_trap_backtrack)->framesize;
13000 size = 3 + (size < 0 ? 0 : size);
13001
13002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13003 free_stack(common, size);
13004 jump = JUMP(SLJIT_JUMP);
13005
13006 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13007 /* STACK_TOP is set by THEN. */
13008 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13009 {
13010 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13011 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13012 }
13013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13014 free_stack(common, 3);
13015
13016 JUMPHERE(jump);
13017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13018 }
13019
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13020 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13021 {
13022 DEFINE_COMPILER;
13023 then_trap_backtrack *save_then_trap = common->then_trap;
13024
13025 while (current)
13026 {
13027 if (current->nextbacktracks != NULL)
13028 set_jumps(current->nextbacktracks, LABEL());
13029 switch(*current->cc)
13030 {
13031 case OP_SET_SOM:
13032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033 free_stack(common, 1);
13034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13035 break;
13036
13037 case OP_STAR:
13038 case OP_MINSTAR:
13039 case OP_PLUS:
13040 case OP_MINPLUS:
13041 case OP_QUERY:
13042 case OP_MINQUERY:
13043 case OP_UPTO:
13044 case OP_MINUPTO:
13045 case OP_EXACT:
13046 case OP_POSSTAR:
13047 case OP_POSPLUS:
13048 case OP_POSQUERY:
13049 case OP_POSUPTO:
13050 case OP_STARI:
13051 case OP_MINSTARI:
13052 case OP_PLUSI:
13053 case OP_MINPLUSI:
13054 case OP_QUERYI:
13055 case OP_MINQUERYI:
13056 case OP_UPTOI:
13057 case OP_MINUPTOI:
13058 case OP_EXACTI:
13059 case OP_POSSTARI:
13060 case OP_POSPLUSI:
13061 case OP_POSQUERYI:
13062 case OP_POSUPTOI:
13063 case OP_NOTSTAR:
13064 case OP_NOTMINSTAR:
13065 case OP_NOTPLUS:
13066 case OP_NOTMINPLUS:
13067 case OP_NOTQUERY:
13068 case OP_NOTMINQUERY:
13069 case OP_NOTUPTO:
13070 case OP_NOTMINUPTO:
13071 case OP_NOTEXACT:
13072 case OP_NOTPOSSTAR:
13073 case OP_NOTPOSPLUS:
13074 case OP_NOTPOSQUERY:
13075 case OP_NOTPOSUPTO:
13076 case OP_NOTSTARI:
13077 case OP_NOTMINSTARI:
13078 case OP_NOTPLUSI:
13079 case OP_NOTMINPLUSI:
13080 case OP_NOTQUERYI:
13081 case OP_NOTMINQUERYI:
13082 case OP_NOTUPTOI:
13083 case OP_NOTMINUPTOI:
13084 case OP_NOTEXACTI:
13085 case OP_NOTPOSSTARI:
13086 case OP_NOTPOSPLUSI:
13087 case OP_NOTPOSQUERYI:
13088 case OP_NOTPOSUPTOI:
13089 case OP_TYPESTAR:
13090 case OP_TYPEMINSTAR:
13091 case OP_TYPEPLUS:
13092 case OP_TYPEMINPLUS:
13093 case OP_TYPEQUERY:
13094 case OP_TYPEMINQUERY:
13095 case OP_TYPEUPTO:
13096 case OP_TYPEMINUPTO:
13097 case OP_TYPEEXACT:
13098 case OP_TYPEPOSSTAR:
13099 case OP_TYPEPOSPLUS:
13100 case OP_TYPEPOSQUERY:
13101 case OP_TYPEPOSUPTO:
13102 case OP_CLASS:
13103 case OP_NCLASS:
13104 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13105 case OP_XCLASS:
13106 #endif
13107 compile_iterator_backtrackingpath(common, current);
13108 break;
13109
13110 case OP_REF:
13111 case OP_REFI:
13112 case OP_DNREF:
13113 case OP_DNREFI:
13114 compile_ref_iterator_backtrackingpath(common, current);
13115 break;
13116
13117 case OP_RECURSE:
13118 compile_recurse_backtrackingpath(common, current);
13119 break;
13120
13121 case OP_ASSERT:
13122 case OP_ASSERT_NOT:
13123 case OP_ASSERTBACK:
13124 case OP_ASSERTBACK_NOT:
13125 compile_assert_backtrackingpath(common, current);
13126 break;
13127
13128 case OP_ONCE:
13129 case OP_SCRIPT_RUN:
13130 case OP_BRA:
13131 case OP_CBRA:
13132 case OP_COND:
13133 case OP_SBRA:
13134 case OP_SCBRA:
13135 case OP_SCOND:
13136 compile_bracket_backtrackingpath(common, current);
13137 break;
13138
13139 case OP_BRAZERO:
13140 if (current->cc[1] > OP_ASSERTBACK_NOT)
13141 compile_bracket_backtrackingpath(common, current);
13142 else
13143 compile_assert_backtrackingpath(common, current);
13144 break;
13145
13146 case OP_BRAPOS:
13147 case OP_CBRAPOS:
13148 case OP_SBRAPOS:
13149 case OP_SCBRAPOS:
13150 case OP_BRAPOSZERO:
13151 compile_bracketpos_backtrackingpath(common, current);
13152 break;
13153
13154 case OP_BRAMINZERO:
13155 compile_braminzero_backtrackingpath(common, current);
13156 break;
13157
13158 case OP_MARK:
13159 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13160 if (common->has_skip_arg)
13161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13162 free_stack(common, common->has_skip_arg ? 5 : 1);
13163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13164 if (common->has_skip_arg)
13165 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13166 break;
13167
13168 case OP_THEN:
13169 case OP_THEN_ARG:
13170 case OP_PRUNE:
13171 case OP_PRUNE_ARG:
13172 case OP_SKIP:
13173 case OP_SKIP_ARG:
13174 compile_control_verb_backtrackingpath(common, current);
13175 break;
13176
13177 case OP_COMMIT:
13178 case OP_COMMIT_ARG:
13179 if (!common->local_quit_available)
13180 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13181 if (common->quit_label == NULL)
13182 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13183 else
13184 JUMPTO(SLJIT_JUMP, common->quit_label);
13185 break;
13186
13187 case OP_CALLOUT:
13188 case OP_CALLOUT_STR:
13189 case OP_FAIL:
13190 case OP_ACCEPT:
13191 case OP_ASSERT_ACCEPT:
13192 set_jumps(current->topbacktracks, LABEL());
13193 break;
13194
13195 case OP_THEN_TRAP:
13196 /* A virtual opcode for then traps. */
13197 compile_then_trap_backtrackingpath(common, current);
13198 break;
13199
13200 default:
13201 SLJIT_UNREACHABLE();
13202 break;
13203 }
13204 current = current->prev;
13205 }
13206 common->then_trap = save_then_trap;
13207 }
13208
compile_recurse(compiler_common * common)13209 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13210 {
13211 DEFINE_COMPILER;
13212 PCRE2_SPTR cc = common->start + common->currententry->start;
13213 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13214 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13215 BOOL needs_control_head;
13216 BOOL has_quit;
13217 BOOL has_accept;
13218 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13219 int alt_count, alt_max, local_size;
13220 backtrack_common altbacktrack;
13221 jump_list *match = NULL;
13222 sljit_uw *next_update_addr = NULL;
13223 struct sljit_jump *alt1 = NULL;
13224 struct sljit_jump *alt2 = NULL;
13225 struct sljit_jump *accept_exit = NULL;
13226 struct sljit_label *quit;
13227
13228 /* Recurse captures then. */
13229 common->then_trap = NULL;
13230
13231 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13232
13233 alt_max = no_alternatives(cc);
13234 alt_count = 0;
13235
13236 /* Matching path. */
13237 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13238 common->currententry->entry_label = LABEL();
13239 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13240
13241 sljit_emit_fast_enter(compiler, TMP2, 0);
13242 count_match(common);
13243
13244 local_size = (alt_max > 1) ? 2 : 1;
13245
13246 /* (Reversed) stack layout:
13247 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13248
13249 allocate_stack(common, private_data_size + local_size);
13250 /* Save return address. */
13251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13252
13253 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13254
13255 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13257
13258 if (needs_control_head)
13259 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13260
13261 if (alt_max > 1)
13262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13263
13264 memset(&altbacktrack, 0, sizeof(backtrack_common));
13265 common->quit_label = NULL;
13266 common->accept_label = NULL;
13267 common->quit = NULL;
13268 common->accept = NULL;
13269 altbacktrack.cc = ccbegin;
13270 cc += GET(cc, 1);
13271 while (1)
13272 {
13273 altbacktrack.top = NULL;
13274 altbacktrack.topbacktracks = NULL;
13275
13276 if (altbacktrack.cc != ccbegin)
13277 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13278
13279 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13280 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13281 return;
13282
13283 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13284 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13285
13286 if (alt_max > 1 || has_accept)
13287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13288
13289 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13290
13291 if (alt_count == 0)
13292 {
13293 /* Backtracking path entry. */
13294 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13295 common->currententry->backtrack_label = LABEL();
13296 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13297
13298 sljit_emit_fast_enter(compiler, TMP1, 0);
13299
13300 if (has_accept)
13301 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
13302
13303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13304 /* Save return address. */
13305 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13306
13307 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13308
13309 if (alt_max > 1)
13310 {
13311 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13312 free_stack(common, 2);
13313
13314 if (alt_max > 4)
13315 {
13316 /* Table jump if alt_max is greater than 4. */
13317 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
13318 if (SLJIT_UNLIKELY(next_update_addr == NULL))
13319 return;
13320 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
13321 add_label_addr(common, next_update_addr++);
13322 }
13323 else
13324 {
13325 if (alt_max == 4)
13326 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
13327 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
13328 }
13329 }
13330 else
13331 free_stack(common, has_accept ? 2 : 1);
13332 }
13333 else if (alt_max > 4)
13334 add_label_addr(common, next_update_addr++);
13335 else
13336 {
13337 if (alt_count != 2 * sizeof(sljit_uw))
13338 {
13339 JUMPHERE(alt1);
13340 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
13341 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
13342 }
13343 else
13344 {
13345 JUMPHERE(alt2);
13346 if (alt_max == 4)
13347 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
13348 }
13349 }
13350
13351 alt_count += sizeof(sljit_uw);
13352
13353 compile_backtrackingpath(common, altbacktrack.top);
13354 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13355 return;
13356 set_jumps(altbacktrack.topbacktracks, LABEL());
13357
13358 if (*cc != OP_ALT)
13359 break;
13360
13361 altbacktrack.cc = cc + 1 + LINK_SIZE;
13362 cc += GET(cc, 1);
13363 }
13364
13365 /* No alternative is matched. */
13366
13367 quit = LABEL();
13368
13369 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13370
13371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13372 free_stack(common, private_data_size + local_size);
13373 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13374 sljit_emit_fast_return(compiler, TMP2, 0);
13375
13376 if (common->quit != NULL)
13377 {
13378 SLJIT_ASSERT(has_quit);
13379
13380 set_jumps(common->quit, LABEL());
13381 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13382 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13383 JUMPTO(SLJIT_JUMP, quit);
13384 }
13385
13386 if (has_accept)
13387 {
13388 JUMPHERE(accept_exit);
13389 free_stack(common, 2);
13390
13391 /* Save return address. */
13392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13393
13394 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13395
13396 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13397 free_stack(common, private_data_size + local_size);
13398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13399 sljit_emit_fast_return(compiler, TMP2, 0);
13400 }
13401
13402 if (common->accept != NULL)
13403 {
13404 SLJIT_ASSERT(has_accept);
13405
13406 set_jumps(common->accept, LABEL());
13407
13408 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13409 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13410
13411 allocate_stack(common, 2);
13412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13413 }
13414
13415 set_jumps(match, LABEL());
13416
13417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13418
13419 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13420
13421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13423 sljit_emit_fast_return(compiler, TMP2, 0);
13424 }
13425
13426 #undef COMPILE_BACKTRACKINGPATH
13427 #undef CURRENT_AS
13428
13429 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13430 (PCRE2_JIT_INVALID_UTF)
13431
jit_compile(pcre2_code * code,sljit_u32 mode)13432 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13433 {
13434 pcre2_real_code *re = (pcre2_real_code *)code;
13435 struct sljit_compiler *compiler;
13436 backtrack_common rootbacktrack;
13437 compiler_common common_data;
13438 compiler_common *common = &common_data;
13439 const sljit_u8 *tables = re->tables;
13440 void *allocator_data = &re->memctl;
13441 int private_data_size;
13442 PCRE2_SPTR ccend;
13443 executable_functions *functions;
13444 void *executable_func;
13445 sljit_uw executable_size;
13446 sljit_uw total_length;
13447 label_addr_list *label_addr;
13448 struct sljit_label *mainloop_label = NULL;
13449 struct sljit_label *continue_match_label;
13450 struct sljit_label *empty_match_found_label = NULL;
13451 struct sljit_label *empty_match_backtrack_label = NULL;
13452 struct sljit_label *reset_match_label;
13453 struct sljit_label *quit_label;
13454 struct sljit_jump *jump;
13455 struct sljit_jump *minlength_check_failed = NULL;
13456 struct sljit_jump *reqbyte_notfound = NULL;
13457 struct sljit_jump *empty_match = NULL;
13458 struct sljit_jump *end_anchor_failed = NULL;
13459
13460 SLJIT_ASSERT(tables);
13461
13462 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13463 memset(common, 0, sizeof(compiler_common));
13464 common->re = re;
13465 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13466 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13467
13468 #ifdef SUPPORT_UNICODE
13469 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13470 #endif /* SUPPORT_UNICODE */
13471 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13472
13473 common->start = rootbacktrack.cc;
13474 common->read_only_data_head = NULL;
13475 common->fcc = tables + fcc_offset;
13476 common->lcc = (sljit_sw)(tables + lcc_offset);
13477 common->mode = mode;
13478 common->might_be_empty = re->minlength == 0;
13479 common->nltype = NLTYPE_FIXED;
13480 switch(re->newline_convention)
13481 {
13482 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13483 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13484 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13485 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13486 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13487 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13488 default: return PCRE2_ERROR_INTERNAL;
13489 }
13490 common->nlmax = READ_CHAR_MAX;
13491 common->nlmin = 0;
13492 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13493 common->bsr_nltype = NLTYPE_ANY;
13494 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13495 common->bsr_nltype = NLTYPE_ANYCRLF;
13496 else
13497 {
13498 #ifdef BSR_ANYCRLF
13499 common->bsr_nltype = NLTYPE_ANYCRLF;
13500 #else
13501 common->bsr_nltype = NLTYPE_ANY;
13502 #endif
13503 }
13504 common->bsr_nlmax = READ_CHAR_MAX;
13505 common->bsr_nlmin = 0;
13506 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13507 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13508 common->name_count = re->name_count;
13509 common->name_entry_size = re->name_entry_size;
13510 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13511 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13512 #ifdef SUPPORT_UNICODE
13513 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13514 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13515 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
13516 if (common->utf)
13517 {
13518 if (common->nltype == NLTYPE_ANY)
13519 common->nlmax = 0x2029;
13520 else if (common->nltype == NLTYPE_ANYCRLF)
13521 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13522 else
13523 {
13524 /* We only care about the first newline character. */
13525 common->nlmax = common->newline & 0xff;
13526 }
13527
13528 if (common->nltype == NLTYPE_FIXED)
13529 common->nlmin = common->newline & 0xff;
13530 else
13531 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13532
13533 if (common->bsr_nltype == NLTYPE_ANY)
13534 common->bsr_nlmax = 0x2029;
13535 else
13536 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13537 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13538 }
13539 else
13540 common->invalid_utf = FALSE;
13541 #endif /* SUPPORT_UNICODE */
13542 ccend = bracketend(common->start);
13543
13544 /* Calculate the local space size on the stack. */
13545 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13546 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13547 if (!common->optimized_cbracket)
13548 return PCRE2_ERROR_NOMEMORY;
13549 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13550 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13551 #else
13552 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13553 #endif
13554
13555 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13556 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13557 common->capture_last_ptr = common->ovector_start;
13558 common->ovector_start += sizeof(sljit_sw);
13559 #endif
13560 if (!check_opcode_types(common, common->start, ccend))
13561 {
13562 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13563 return PCRE2_ERROR_NOMEMORY;
13564 }
13565
13566 /* Checking flags and updating ovector_start. */
13567 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13568 {
13569 common->req_char_ptr = common->ovector_start;
13570 common->ovector_start += sizeof(sljit_sw);
13571 }
13572 if (mode != PCRE2_JIT_COMPLETE)
13573 {
13574 common->start_used_ptr = common->ovector_start;
13575 common->ovector_start += sizeof(sljit_sw);
13576 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13577 {
13578 common->hit_start = common->ovector_start;
13579 common->ovector_start += sizeof(sljit_sw);
13580 }
13581 }
13582 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13583 {
13584 common->match_end_ptr = common->ovector_start;
13585 common->ovector_start += sizeof(sljit_sw);
13586 }
13587 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13588 common->control_head_ptr = 1;
13589 #endif
13590 if (common->control_head_ptr != 0)
13591 {
13592 common->control_head_ptr = common->ovector_start;
13593 common->ovector_start += sizeof(sljit_sw);
13594 }
13595 if (common->has_set_som)
13596 {
13597 /* Saving the real start pointer is necessary. */
13598 common->start_ptr = common->ovector_start;
13599 common->ovector_start += sizeof(sljit_sw);
13600 }
13601
13602 /* Aligning ovector to even number of sljit words. */
13603 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13604 common->ovector_start += sizeof(sljit_sw);
13605
13606 if (common->start_ptr == 0)
13607 common->start_ptr = OVECTOR(0);
13608
13609 /* Capturing brackets cannot be optimized if callouts are allowed. */
13610 if (common->capture_last_ptr != 0)
13611 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13612
13613 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13614 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13615
13616 total_length = ccend - common->start;
13617 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13618 if (!common->private_data_ptrs)
13619 {
13620 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13621 return PCRE2_ERROR_NOMEMORY;
13622 }
13623 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13624
13625 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13626 set_private_data_ptrs(common, &private_data_size, ccend);
13627 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13628 {
13629 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
13630 detect_fast_fail(common, common->start, &private_data_size, 4);
13631 }
13632
13633 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
13634
13635 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13636 {
13637 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13638 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13639 return PCRE2_ERROR_NOMEMORY;
13640 }
13641
13642 if (common->has_then)
13643 {
13644 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13645 memset(common->then_offsets, 0, total_length);
13646 set_then_offsets(common, common->start, NULL);
13647 }
13648
13649 compiler = sljit_create_compiler(allocator_data);
13650 if (!compiler)
13651 {
13652 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13653 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13654 return PCRE2_ERROR_NOMEMORY;
13655 }
13656 common->compiler = compiler;
13657
13658 /* Main pcre_jit_exec entry. */
13659 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13660
13661 /* Register init. */
13662 reset_ovector(common, (re->top_bracket + 1) * 2);
13663 if (common->req_char_ptr != 0)
13664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13665
13666 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13669 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13671 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13672 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13673 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13674 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13676
13677 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
13678 reset_fast_fail(common);
13679
13680 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13682 if (common->mark_ptr != 0)
13683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13684 if (common->control_head_ptr != 0)
13685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13686
13687 /* Main part of the matching */
13688 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13689 {
13690 mainloop_label = mainloop_entry(common);
13691 continue_match_label = LABEL();
13692 /* Forward search if possible. */
13693 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13694 {
13695 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13696 ;
13697 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13698 fast_forward_first_char(common);
13699 else if ((re->flags & PCRE2_STARTLINE) != 0)
13700 fast_forward_newline(common);
13701 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13702 fast_forward_start_bits(common);
13703 }
13704 }
13705 else
13706 continue_match_label = LABEL();
13707
13708 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13709 {
13710 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13711 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13712 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13713 }
13714 if (common->req_char_ptr != 0)
13715 reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13716
13717 /* Store the current STR_PTR in OVECTOR(0). */
13718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13719 /* Copy the limit of allowed recursions. */
13720 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13721 if (common->capture_last_ptr != 0)
13722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13723 if (common->fast_forward_bc_ptr != NULL)
13724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
13725
13726 if (common->start_ptr != OVECTOR(0))
13727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13728
13729 /* Copy the beginning of the string. */
13730 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13731 {
13732 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13734 JUMPHERE(jump);
13735 }
13736 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13738
13739 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13740 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13741 {
13742 sljit_free_compiler(compiler);
13743 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13744 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13745 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13746 return PCRE2_ERROR_NOMEMORY;
13747 }
13748
13749 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13750 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13751
13752 if (common->might_be_empty)
13753 {
13754 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13755 empty_match_found_label = LABEL();
13756 }
13757
13758 common->accept_label = LABEL();
13759 if (common->accept != NULL)
13760 set_jumps(common->accept, common->accept_label);
13761
13762 /* This means we have a match. Update the ovector. */
13763 copy_ovector(common, re->top_bracket + 1);
13764 common->quit_label = common->abort_label = LABEL();
13765 if (common->quit != NULL)
13766 set_jumps(common->quit, common->quit_label);
13767 if (common->abort != NULL)
13768 set_jumps(common->abort, common->abort_label);
13769 if (minlength_check_failed != NULL)
13770 SET_LABEL(minlength_check_failed, common->abort_label);
13771 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13772
13773 if (common->failed_match != NULL)
13774 {
13775 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13776 set_jumps(common->failed_match, LABEL());
13777 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13778 JUMPTO(SLJIT_JUMP, common->abort_label);
13779 }
13780
13781 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13782 JUMPHERE(end_anchor_failed);
13783
13784 if (mode != PCRE2_JIT_COMPLETE)
13785 {
13786 common->partialmatchlabel = LABEL();
13787 set_jumps(common->partialmatch, common->partialmatchlabel);
13788 return_with_partial_match(common, common->quit_label);
13789 }
13790
13791 if (common->might_be_empty)
13792 empty_match_backtrack_label = LABEL();
13793 compile_backtrackingpath(common, rootbacktrack.top);
13794 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13795 {
13796 sljit_free_compiler(compiler);
13797 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13798 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13799 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13800 return PCRE2_ERROR_NOMEMORY;
13801 }
13802
13803 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13804 reset_match_label = LABEL();
13805
13806 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13807 {
13808 /* Update hit_start only in the first time. */
13809 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13811 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13813 JUMPHERE(jump);
13814 }
13815
13816 /* Check we have remaining characters. */
13817 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13818 {
13819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13820 }
13821
13822 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13823 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
13824
13825 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13826 {
13827 if (common->ff_newline_shortcut != NULL)
13828 {
13829 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13830 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13831 {
13832 if (common->match_end_ptr != 0)
13833 {
13834 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13835 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13836 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13837 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13838 }
13839 else
13840 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13841 }
13842 }
13843 else
13844 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13845 }
13846
13847 /* No more remaining characters. */
13848 if (reqbyte_notfound != NULL)
13849 JUMPHERE(reqbyte_notfound);
13850
13851 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13852 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13853
13854 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13855 JUMPTO(SLJIT_JUMP, common->quit_label);
13856
13857 flush_stubs(common);
13858
13859 if (common->might_be_empty)
13860 {
13861 JUMPHERE(empty_match);
13862 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13863 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13864 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13865 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13866 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13867 JUMPTO(SLJIT_ZERO, empty_match_found_label);
13868 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13869 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13870 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13871 }
13872
13873 common->fast_forward_bc_ptr = NULL;
13874 common->fast_fail_start_ptr = 0;
13875 common->fast_fail_end_ptr = 0;
13876 common->currententry = common->entries;
13877 common->local_quit_available = TRUE;
13878 quit_label = common->quit_label;
13879 while (common->currententry != NULL)
13880 {
13881 /* Might add new entries. */
13882 compile_recurse(common);
13883 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13884 {
13885 sljit_free_compiler(compiler);
13886 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13887 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13888 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
13889 return PCRE2_ERROR_NOMEMORY;
13890 }
13891 flush_stubs(common);
13892 common->currententry = common->currententry->next;
13893 }
13894 common->local_quit_available = FALSE;
13895 common->quit_label = quit_label;
13896
13897 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13898 /* This is a (really) rare case. */
13899 set_jumps(common->stackalloc, LABEL());
13900 /* RETURN_ADDR is not a saved register. */
13901 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13902
13903 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13904
13905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13906 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13907 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13908 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13909 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13910
13911 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13912
13913 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13914 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13915 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13917 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13918 sljit_emit_fast_return(compiler, TMP1, 0);
13919
13920 /* Allocation failed. */
13921 JUMPHERE(jump);
13922 /* We break the return address cache here, but this is a really rare case. */
13923 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13924 JUMPTO(SLJIT_JUMP, common->quit_label);
13925
13926 /* Call limit reached. */
13927 set_jumps(common->calllimit, LABEL());
13928 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13929 JUMPTO(SLJIT_JUMP, common->quit_label);
13930
13931 if (common->revertframes != NULL)
13932 {
13933 set_jumps(common->revertframes, LABEL());
13934 do_revertframes(common);
13935 }
13936 if (common->wordboundary != NULL)
13937 {
13938 set_jumps(common->wordboundary, LABEL());
13939 check_wordboundary(common);
13940 }
13941 if (common->anynewline != NULL)
13942 {
13943 set_jumps(common->anynewline, LABEL());
13944 check_anynewline(common);
13945 }
13946 if (common->hspace != NULL)
13947 {
13948 set_jumps(common->hspace, LABEL());
13949 check_hspace(common);
13950 }
13951 if (common->vspace != NULL)
13952 {
13953 set_jumps(common->vspace, LABEL());
13954 check_vspace(common);
13955 }
13956 if (common->casefulcmp != NULL)
13957 {
13958 set_jumps(common->casefulcmp, LABEL());
13959 do_casefulcmp(common);
13960 }
13961 if (common->caselesscmp != NULL)
13962 {
13963 set_jumps(common->caselesscmp, LABEL());
13964 do_caselesscmp(common);
13965 }
13966 if (common->reset_match != NULL)
13967 {
13968 set_jumps(common->reset_match, LABEL());
13969 do_reset_match(common, (re->top_bracket + 1) * 2);
13970 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13971 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13972 JUMPTO(SLJIT_JUMP, reset_match_label);
13973 }
13974 #ifdef SUPPORT_UNICODE
13975 #if PCRE2_CODE_UNIT_WIDTH == 8
13976 if (common->utfreadchar != NULL)
13977 {
13978 set_jumps(common->utfreadchar, LABEL());
13979 do_utfreadchar(common);
13980 }
13981 if (common->utfreadtype8 != NULL)
13982 {
13983 set_jumps(common->utfreadtype8, LABEL());
13984 do_utfreadtype8(common);
13985 }
13986 if (common->utfpeakcharback != NULL)
13987 {
13988 set_jumps(common->utfpeakcharback, LABEL());
13989 do_utfpeakcharback(common);
13990 }
13991 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13992 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13993 if (common->utfreadchar_invalid != NULL)
13994 {
13995 set_jumps(common->utfreadchar_invalid, LABEL());
13996 do_utfreadchar_invalid(common);
13997 }
13998 if (common->utfreadnewline_invalid != NULL)
13999 {
14000 set_jumps(common->utfreadnewline_invalid, LABEL());
14001 do_utfreadnewline_invalid(common);
14002 }
14003 if (common->utfmoveback_invalid)
14004 {
14005 set_jumps(common->utfmoveback_invalid, LABEL());
14006 do_utfmoveback_invalid(common);
14007 }
14008 if (common->utfpeakcharback_invalid)
14009 {
14010 set_jumps(common->utfpeakcharback_invalid, LABEL());
14011 do_utfpeakcharback_invalid(common);
14012 }
14013 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14014 if (common->getucd != NULL)
14015 {
14016 set_jumps(common->getucd, LABEL());
14017 do_getucd(common);
14018 }
14019 if (common->getucdtype != NULL)
14020 {
14021 set_jumps(common->getucdtype, LABEL());
14022 do_getucdtype(common);
14023 }
14024 #endif /* SUPPORT_UNICODE */
14025
14026 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14027 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14028
14029 executable_func = sljit_generate_code(compiler);
14030 executable_size = sljit_get_generated_code_size(compiler);
14031 label_addr = common->label_addrs;
14032 while (label_addr != NULL)
14033 {
14034 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
14035 label_addr = label_addr->next;
14036 }
14037 sljit_free_compiler(compiler);
14038 if (executable_func == NULL)
14039 {
14040 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
14041 return PCRE2_ERROR_NOMEMORY;
14042 }
14043
14044 /* Reuse the function descriptor if possible. */
14045 if (re->executable_jit != NULL)
14046 functions = (executable_functions *)re->executable_jit;
14047 else
14048 {
14049 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14050 if (functions == NULL)
14051 {
14052 /* This case is highly unlikely since we just recently
14053 freed a lot of memory. Not impossible though. */
14054 sljit_free_code(executable_func);
14055 PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
14056 return PCRE2_ERROR_NOMEMORY;
14057 }
14058 memset(functions, 0, sizeof(executable_functions));
14059 functions->top_bracket = re->top_bracket + 1;
14060 functions->limit_match = re->limit_match;
14061 re->executable_jit = functions;
14062 }
14063
14064 /* Turn mode into an index. */
14065 if (mode == PCRE2_JIT_COMPLETE)
14066 mode = 0;
14067 else
14068 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14069
14070 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14071 functions->executable_funcs[mode] = executable_func;
14072 functions->read_only_data_heads[mode] = common->read_only_data_head;
14073 functions->executable_sizes[mode] = executable_size;
14074 return 0;
14075 }
14076
14077 #endif
14078
14079 /*************************************************
14080 * JIT compile a Regular Expression *
14081 *************************************************/
14082
14083 /* This function used JIT to convert a previously-compiled pattern into machine
14084 code.
14085
14086 Arguments:
14087 code a compiled pattern
14088 options JIT option bits
14089
14090 Returns: 0: success or (*NOJIT) was used
14091 <0: an error code
14092 */
14093
14094 #define PUBLIC_JIT_COMPILE_OPTIONS \
14095 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14096
14097 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14098 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14099 {
14100 #ifndef SUPPORT_JIT
14101
14102 (void)code;
14103 (void)options;
14104 return PCRE2_ERROR_JIT_BADOPTION;
14105
14106 #else /* SUPPORT_JIT */
14107
14108 pcre2_real_code *re = (pcre2_real_code *)code;
14109 executable_functions *functions;
14110 uint32_t excluded_options;
14111 int result;
14112
14113 if (code == NULL)
14114 return PCRE2_ERROR_NULL;
14115
14116 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14117 return PCRE2_ERROR_JIT_BADOPTION;
14118
14119 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14120
14121 functions = (executable_functions *)re->executable_jit;
14122
14123 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14124 || functions->executable_funcs[0] == NULL)) {
14125 excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14126 result = jit_compile(code, options & ~excluded_options);
14127 if (result != 0)
14128 return result;
14129 }
14130
14131 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14132 || functions->executable_funcs[1] == NULL)) {
14133 excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14134 result = jit_compile(code, options & ~excluded_options);
14135 if (result != 0)
14136 return result;
14137 }
14138
14139 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14140 || functions->executable_funcs[2] == NULL)) {
14141 excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14142 result = jit_compile(code, options & ~excluded_options);
14143 if (result != 0)
14144 return result;
14145 }
14146
14147 return 0;
14148
14149 #endif /* SUPPORT_JIT */
14150 }
14151
14152 /* JIT compiler uses an all-in-one approach. This improves security,
14153 since the code generator functions are not exported. */
14154
14155 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14156
14157 #include "pcre2_jit_match.c"
14158 #include "pcre2_jit_misc.c"
14159
14160 /* End of pcre2_jit_compile.c */
14161