1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2020 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49
50 #include "pcre2_internal.h"
51
52
53 /*************************************************
54 * Tables for auto-possessification *
55 *************************************************/
56
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
75 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
76 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
77 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
78 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
79 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
80 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
86 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
88 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
90 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
92 };
93
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99
100 0 Always return FALSE (never auto-possessify)
101 1 Character groups are distinct (possessify if both are OP_PROP)
102 2 Check character categories in the same group (general or particular)
103 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104
105 4 Check left general category vs right particular category
106 5 Check right general category vs left particular category
107
108 6 Left alphanum vs right general category
109 7 Left space vs right general category
110 8 Left word vs right general category
111
112 9 Right alphanum vs left general category
113 10 Right space vs left general category
114 11 Right word vs left general category
115
116 12 Left alphanum vs right particular category
117 13 Left space vs right particular category
118 14 Left word vs right particular category
119
120 15 Right alphanum vs left particular category
121 16 Right space vs left particular category
122 17 Right word vs left particular category
123 */
124
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
128 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
129 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
130 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
131 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
132 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
133 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
134 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
135 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
136 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
138 };
139
140 /* This table is used to check whether auto-possessification is possible
141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
142 specifies a general category and the other specifies a particular category. The
143 row is selected by the general category and the column by the particular
144 category. The value is 1 if the particular category is not part of the general
145 category. */
146
147 static const uint8_t catposstab[7][30] = {
148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
149 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
150 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
151 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
152 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
153 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
154 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
155 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
156 };
157
158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
159 a general or particular category. The properties in each row are those
160 that apply to the character set in question. Duplication means that a little
161 unnecessary work is done when checking, but this keeps things much simpler
162 because they can all use the same code. For more details see the comment where
163 this table is used.
164
165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
166 "space", but from Perl 5.18 it's included, so both categories are treated the
167 same here. */
168
169 static const uint8_t posspropstab[3][4] = {
170 { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
171 { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
172 { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
173 };
174 #endif /* SUPPORT_UNICODE */
175
176
177
178 #ifdef SUPPORT_UNICODE
179 /*************************************************
180 * Check a character and a property *
181 *************************************************/
182
183 /* This function is called by compare_opcodes() when a property item is
184 adjacent to a fixed character.
185
186 Arguments:
187 c the character
188 ptype the property type
189 pdata the data for the type
190 negated TRUE if it's a negated property (\P or \p{^)
191
192 Returns: TRUE if auto-possessifying is OK
193 */
194
195 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
197 BOOL negated)
198 {
199 const uint32_t *p;
200 const ucd_record *prop = GET_UCD(c);
201
202 switch(ptype)
203 {
204 case PT_LAMP:
205 return (prop->chartype == ucp_Lu ||
206 prop->chartype == ucp_Ll ||
207 prop->chartype == ucp_Lt) == negated;
208
209 case PT_GC:
210 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
211
212 case PT_PC:
213 return (pdata == prop->chartype) == negated;
214
215 case PT_SC:
216 return (pdata == prop->script) == negated;
217
218 /* These are specials */
219
220 case PT_ALNUM:
221 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
222 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
223
224 /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
225 means that Perl space and POSIX space are now identical. PCRE was changed
226 at release 8.34. */
227
228 case PT_SPACE: /* Perl space */
229 case PT_PXSPACE: /* POSIX space */
230 switch(c)
231 {
232 HSPACE_CASES:
233 VSPACE_CASES:
234 return negated;
235
236 default:
237 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
238 }
239 break; /* Control never reaches here */
240
241 case PT_WORD:
242 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
243 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
244 c == CHAR_UNDERSCORE) == negated;
245
246 case PT_CLIST:
247 p = PRIV(ucd_caseless_sets) + prop->caseset;
248 for (;;)
249 {
250 if (c < *p) return !negated;
251 if (c == *p++) return negated;
252 }
253 break; /* Control never reaches here */
254 }
255
256 return FALSE;
257 }
258 #endif /* SUPPORT_UNICODE */
259
260
261
262 /*************************************************
263 * Base opcode of repeated opcodes *
264 *************************************************/
265
266 /* Returns the base opcode for repeated single character type opcodes. If the
267 opcode is not a repeated character type, it returns with the original value.
268
269 Arguments: c opcode
270 Returns: base opcode for the type
271 */
272
273 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)274 get_repeat_base(PCRE2_UCHAR c)
275 {
276 return (c > OP_TYPEPOSUPTO)? c :
277 (c >= OP_TYPESTAR)? OP_TYPESTAR :
278 (c >= OP_NOTSTARI)? OP_NOTSTARI :
279 (c >= OP_NOTSTAR)? OP_NOTSTAR :
280 (c >= OP_STARI)? OP_STARI :
281 OP_STAR;
282 }
283
284
285 /*************************************************
286 * Fill the character property list *
287 *************************************************/
288
289 /* Checks whether the code points to an opcode that can take part in auto-
290 possessification, and if so, fills a list with its properties.
291
292 Arguments:
293 code points to start of expression
294 utf TRUE if in UTF mode
295 ucp TRUE if in UCP mode
296 fcc points to the case-flipping table
297 list points to output list
298 list[0] will be filled with the opcode
299 list[1] will be non-zero if this opcode
300 can match an empty character string
301 list[2..7] depends on the opcode
302
303 Returns: points to the start of the next opcode if *code is accepted
304 NULL if *code is not accepted
305 */
306
307 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,BOOL ucp,const uint8_t * fcc,uint32_t * list)308 get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
309 uint32_t *list)
310 {
311 PCRE2_UCHAR c = *code;
312 PCRE2_UCHAR base;
313 PCRE2_SPTR end;
314 uint32_t chr;
315
316 #ifdef SUPPORT_UNICODE
317 uint32_t *clist_dest;
318 const uint32_t *clist_src;
319 #else
320 (void)utf; /* Suppress "unused parameter" compiler warnings */
321 (void)ucp;
322 #endif
323
324 list[0] = c;
325 list[1] = FALSE;
326 code++;
327
328 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
329 {
330 base = get_repeat_base(c);
331 c -= (base - OP_STAR);
332
333 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
334 code += IMM2_SIZE;
335
336 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
337 c != OP_POSPLUS);
338
339 switch(base)
340 {
341 case OP_STAR:
342 list[0] = OP_CHAR;
343 break;
344
345 case OP_STARI:
346 list[0] = OP_CHARI;
347 break;
348
349 case OP_NOTSTAR:
350 list[0] = OP_NOT;
351 break;
352
353 case OP_NOTSTARI:
354 list[0] = OP_NOTI;
355 break;
356
357 case OP_TYPESTAR:
358 list[0] = *code;
359 code++;
360 break;
361 }
362 c = list[0];
363 }
364
365 switch(c)
366 {
367 case OP_NOT_DIGIT:
368 case OP_DIGIT:
369 case OP_NOT_WHITESPACE:
370 case OP_WHITESPACE:
371 case OP_NOT_WORDCHAR:
372 case OP_WORDCHAR:
373 case OP_ANY:
374 case OP_ALLANY:
375 case OP_ANYNL:
376 case OP_NOT_HSPACE:
377 case OP_HSPACE:
378 case OP_NOT_VSPACE:
379 case OP_VSPACE:
380 case OP_EXTUNI:
381 case OP_EODN:
382 case OP_EOD:
383 case OP_DOLL:
384 case OP_DOLLM:
385 return code;
386
387 case OP_CHAR:
388 case OP_NOT:
389 GETCHARINCTEST(chr, code);
390 list[2] = chr;
391 list[3] = NOTACHAR;
392 return code;
393
394 case OP_CHARI:
395 case OP_NOTI:
396 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
397 GETCHARINCTEST(chr, code);
398 list[2] = chr;
399
400 #ifdef SUPPORT_UNICODE
401 if (chr < 128 || (chr < 256 && !utf && !ucp))
402 list[3] = fcc[chr];
403 else
404 list[3] = UCD_OTHERCASE(chr);
405 #elif defined SUPPORT_WIDE_CHARS
406 list[3] = (chr < 256) ? fcc[chr] : chr;
407 #else
408 list[3] = fcc[chr];
409 #endif
410
411 /* The othercase might be the same value. */
412
413 if (chr == list[3])
414 list[3] = NOTACHAR;
415 else
416 list[4] = NOTACHAR;
417 return code;
418
419 #ifdef SUPPORT_UNICODE
420 case OP_PROP:
421 case OP_NOTPROP:
422 if (code[0] != PT_CLIST)
423 {
424 list[2] = code[0];
425 list[3] = code[1];
426 return code + 2;
427 }
428
429 /* Convert only if we have enough space. */
430
431 clist_src = PRIV(ucd_caseless_sets) + code[1];
432 clist_dest = list + 2;
433 code += 2;
434
435 do {
436 if (clist_dest >= list + 8)
437 {
438 /* Early return if there is not enough space. This should never
439 happen, since all clists are shorter than 5 character now. */
440 list[2] = code[0];
441 list[3] = code[1];
442 return code;
443 }
444 *clist_dest++ = *clist_src;
445 }
446 while(*clist_src++ != NOTACHAR);
447
448 /* All characters are stored. The terminating NOTACHAR is copied from the
449 clist itself. */
450
451 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
452 return code;
453 #endif
454
455 case OP_NCLASS:
456 case OP_CLASS:
457 #ifdef SUPPORT_WIDE_CHARS
458 case OP_XCLASS:
459 if (c == OP_XCLASS)
460 end = code + GET(code, 0) - 1;
461 else
462 #endif
463 end = code + 32 / sizeof(PCRE2_UCHAR);
464
465 switch(*end)
466 {
467 case OP_CRSTAR:
468 case OP_CRMINSTAR:
469 case OP_CRQUERY:
470 case OP_CRMINQUERY:
471 case OP_CRPOSSTAR:
472 case OP_CRPOSQUERY:
473 list[1] = TRUE;
474 end++;
475 break;
476
477 case OP_CRPLUS:
478 case OP_CRMINPLUS:
479 case OP_CRPOSPLUS:
480 end++;
481 break;
482
483 case OP_CRRANGE:
484 case OP_CRMINRANGE:
485 case OP_CRPOSRANGE:
486 list[1] = (GET2(end, 1) == 0);
487 end += 1 + 2 * IMM2_SIZE;
488 break;
489 }
490 list[2] = (uint32_t)(end - code);
491 return end;
492 }
493 return NULL; /* Opcode not accepted */
494 }
495
496
497
498 /*************************************************
499 * Scan further character sets for match *
500 *************************************************/
501
502 /* Checks whether the base and the current opcode have a common character, in
503 which case the base cannot be possessified.
504
505 Arguments:
506 code points to the byte code
507 utf TRUE in UTF mode
508 ucp TRUE in UCP mode
509 cb compile data block
510 base_list the data list of the base opcode
511 base_end the end of the base opcode
512 rec_limit points to recursion depth counter
513
514 Returns: TRUE if the auto-possessification is possible
515 */
516
517 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,BOOL ucp,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)518 compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
519 const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
520 {
521 PCRE2_UCHAR c;
522 uint32_t list[8];
523 const uint32_t *chr_ptr;
524 const uint32_t *ochr_ptr;
525 const uint32_t *list_ptr;
526 PCRE2_SPTR next_code;
527 #ifdef SUPPORT_WIDE_CHARS
528 PCRE2_SPTR xclass_flags;
529 #endif
530 const uint8_t *class_bitset;
531 const uint8_t *set1, *set2, *set_end;
532 uint32_t chr;
533 BOOL accepted, invert_bits;
534 BOOL entered_a_group = FALSE;
535
536 if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
537
538 /* Note: the base_list[1] contains whether the current opcode has a greedy
539 (represented by a non-zero value) quantifier. This is a different from
540 other character type lists, which store here that the character iterator
541 matches to an empty string (also represented by a non-zero value). */
542
543 for(;;)
544 {
545 /* All operations move the code pointer forward.
546 Therefore infinite recursions are not possible. */
547
548 c = *code;
549
550 /* Skip over callouts */
551
552 if (c == OP_CALLOUT)
553 {
554 code += PRIV(OP_lengths)[c];
555 continue;
556 }
557
558 if (c == OP_CALLOUT_STR)
559 {
560 code += GET(code, 1 + 2*LINK_SIZE);
561 continue;
562 }
563
564 /* At the end of a branch, skip to the end of the group. */
565
566 if (c == OP_ALT)
567 {
568 do code += GET(code, 1); while (*code == OP_ALT);
569 c = *code;
570 }
571
572 /* Inspect the next opcode. */
573
574 switch(c)
575 {
576 /* We can always possessify a greedy iterator at the end of the pattern,
577 which is reached after skipping over the final OP_KET. A non-greedy
578 iterator must never be possessified. */
579
580 case OP_END:
581 return base_list[1] != 0;
582
583 /* When an iterator is at the end of certain kinds of group we can inspect
584 what follows the group by skipping over the closing ket. Note that this
585 does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
586 iteration is variable (could be another iteration or could be the next
587 item). As these two opcodes are not listed in the next switch, they will
588 end up as the next code to inspect, and return FALSE by virtue of being
589 unsupported. */
590
591 case OP_KET:
592 case OP_KETRPOS:
593 /* The non-greedy case cannot be converted to a possessive form. */
594
595 if (base_list[1] == 0) return FALSE;
596
597 /* If the bracket is capturing it might be referenced by an OP_RECURSE
598 so its last iterator can never be possessified if the pattern contains
599 recursions. (This could be improved by keeping a list of group numbers that
600 are called by recursion.) */
601
602 switch(*(code - GET(code, 1)))
603 {
604 case OP_CBRA:
605 case OP_SCBRA:
606 case OP_CBRAPOS:
607 case OP_SCBRAPOS:
608 if (cb->had_recurse) return FALSE;
609 break;
610
611 /* A script run might have to backtrack if the iterated item can match
612 characters from more than one script. So give up unless repeating an
613 explicit character. */
614
615 case OP_SCRIPT_RUN:
616 if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
617 return FALSE;
618 break;
619
620 /* Atomic sub-patterns and assertions can always auto-possessify their
621 last iterator. However, if the group was entered as a result of checking
622 a previous iterator, this is not possible. */
623
624 case OP_ASSERT:
625 case OP_ASSERT_NOT:
626 case OP_ASSERTBACK:
627 case OP_ASSERTBACK_NOT:
628 case OP_ONCE:
629 return !entered_a_group;
630
631 /* Non-atomic assertions - don't possessify last iterator. This needs
632 more thought. */
633
634 case OP_ASSERT_NA:
635 case OP_ASSERTBACK_NA:
636 return FALSE;
637 }
638
639 /* Skip over the bracket and inspect what comes next. */
640
641 code += PRIV(OP_lengths)[c];
642 continue;
643
644 /* Handle cases where the next item is a group. */
645
646 case OP_ONCE:
647 case OP_BRA:
648 case OP_CBRA:
649 next_code = code + GET(code, 1);
650 code += PRIV(OP_lengths)[c];
651
652 /* Check each branch. We have to recurse a level for all but the last
653 branch. */
654
655 while (*next_code == OP_ALT)
656 {
657 if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
658 return FALSE;
659 code = next_code + 1 + LINK_SIZE;
660 next_code += GET(next_code, 1);
661 }
662
663 entered_a_group = TRUE;
664 continue;
665
666 case OP_BRAZERO:
667 case OP_BRAMINZERO:
668
669 next_code = code + 1;
670 if (*next_code != OP_BRA && *next_code != OP_CBRA &&
671 *next_code != OP_ONCE) return FALSE;
672
673 do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
674
675 /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
676
677 next_code += 1 + LINK_SIZE;
678 if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
679 rec_limit))
680 return FALSE;
681
682 code += PRIV(OP_lengths)[c];
683 continue;
684
685 /* The next opcode does not need special handling; fall through and use it
686 to see if the base can be possessified. */
687
688 default:
689 break;
690 }
691
692 /* We now have the next appropriate opcode to compare with the base. Check
693 for a supported opcode, and load its properties. */
694
695 code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
696 if (code == NULL) return FALSE; /* Unsupported */
697
698 /* If either opcode is a small character list, set pointers for comparing
699 characters from that list with another list, or with a property. */
700
701 if (base_list[0] == OP_CHAR)
702 {
703 chr_ptr = base_list + 2;
704 list_ptr = list;
705 }
706 else if (list[0] == OP_CHAR)
707 {
708 chr_ptr = list + 2;
709 list_ptr = base_list;
710 }
711
712 /* Character bitsets can also be compared to certain opcodes. */
713
714 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
715 #if PCRE2_CODE_UNIT_WIDTH == 8
716 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
717 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
718 #endif
719 )
720 {
721 #if PCRE2_CODE_UNIT_WIDTH == 8
722 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
723 #else
724 if (base_list[0] == OP_CLASS)
725 #endif
726 {
727 set1 = (uint8_t *)(base_end - base_list[2]);
728 list_ptr = list;
729 }
730 else
731 {
732 set1 = (uint8_t *)(code - list[2]);
733 list_ptr = base_list;
734 }
735
736 invert_bits = FALSE;
737 switch(list_ptr[0])
738 {
739 case OP_CLASS:
740 case OP_NCLASS:
741 set2 = (uint8_t *)
742 ((list_ptr == list ? code : base_end) - list_ptr[2]);
743 break;
744
745 #ifdef SUPPORT_WIDE_CHARS
746 case OP_XCLASS:
747 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
748 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
749 if ((*xclass_flags & XCL_MAP) == 0)
750 {
751 /* No bits are set for characters < 256. */
752 if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
753 /* Might be an empty repeat. */
754 continue;
755 }
756 set2 = (uint8_t *)(xclass_flags + 1);
757 break;
758 #endif
759
760 case OP_NOT_DIGIT:
761 invert_bits = TRUE;
762 /* Fall through */
763 case OP_DIGIT:
764 set2 = (uint8_t *)(cb->cbits + cbit_digit);
765 break;
766
767 case OP_NOT_WHITESPACE:
768 invert_bits = TRUE;
769 /* Fall through */
770 case OP_WHITESPACE:
771 set2 = (uint8_t *)(cb->cbits + cbit_space);
772 break;
773
774 case OP_NOT_WORDCHAR:
775 invert_bits = TRUE;
776 /* Fall through */
777 case OP_WORDCHAR:
778 set2 = (uint8_t *)(cb->cbits + cbit_word);
779 break;
780
781 default:
782 return FALSE;
783 }
784
785 /* Because the bit sets are unaligned bytes, we need to perform byte
786 comparison here. */
787
788 set_end = set1 + 32;
789 if (invert_bits)
790 {
791 do
792 {
793 if ((*set1++ & ~(*set2++)) != 0) return FALSE;
794 }
795 while (set1 < set_end);
796 }
797 else
798 {
799 do
800 {
801 if ((*set1++ & *set2++) != 0) return FALSE;
802 }
803 while (set1 < set_end);
804 }
805
806 if (list[1] == 0) return TRUE;
807 /* Might be an empty repeat. */
808 continue;
809 }
810
811 /* Some property combinations also acceptable. Unicode property opcodes are
812 processed specially; the rest can be handled with a lookup table. */
813
814 else
815 {
816 uint32_t leftop, rightop;
817
818 leftop = base_list[0];
819 rightop = list[0];
820
821 #ifdef SUPPORT_UNICODE
822 accepted = FALSE; /* Always set in non-unicode case. */
823 if (leftop == OP_PROP || leftop == OP_NOTPROP)
824 {
825 if (rightop == OP_EOD)
826 accepted = TRUE;
827 else if (rightop == OP_PROP || rightop == OP_NOTPROP)
828 {
829 int n;
830 const uint8_t *p;
831 BOOL same = leftop == rightop;
832 BOOL lisprop = leftop == OP_PROP;
833 BOOL risprop = rightop == OP_PROP;
834 BOOL bothprop = lisprop && risprop;
835
836 /* There's a table that specifies how each combination is to be
837 processed:
838 0 Always return FALSE (never auto-possessify)
839 1 Character groups are distinct (possessify if both are OP_PROP)
840 2 Check character categories in the same group (general or particular)
841 3 Return TRUE if the two opcodes are not the same
842 ... see comments below
843 */
844
845 n = propposstab[base_list[2]][list[2]];
846 switch(n)
847 {
848 case 0: break;
849 case 1: accepted = bothprop; break;
850 case 2: accepted = (base_list[3] == list[3]) != same; break;
851 case 3: accepted = !same; break;
852
853 case 4: /* Left general category, right particular category */
854 accepted = risprop && catposstab[base_list[3]][list[3]] == same;
855 break;
856
857 case 5: /* Right general category, left particular category */
858 accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
859 break;
860
861 /* This code is logically tricky. Think hard before fiddling with it.
862 The posspropstab table has four entries per row. Each row relates to
863 one of PCRE's special properties such as ALNUM or SPACE or WORD.
864 Only WORD actually needs all four entries, but using repeats for the
865 others means they can all use the same code below.
866
867 The first two entries in each row are Unicode general categories, and
868 apply always, because all the characters they include are part of the
869 PCRE character set. The third and fourth entries are a general and a
870 particular category, respectively, that include one or more relevant
871 characters. One or the other is used, depending on whether the check
872 is for a general or a particular category. However, in both cases the
873 category contains more characters than the specials that are defined
874 for the property being tested against. Therefore, it cannot be used
875 in a NOTPROP case.
876
877 Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
878 Underscore is covered by ucp_P or ucp_Po. */
879
880 case 6: /* Left alphanum vs right general category */
881 case 7: /* Left space vs right general category */
882 case 8: /* Left word vs right general category */
883 p = posspropstab[n-6];
884 accepted = risprop && lisprop ==
885 (list[3] != p[0] &&
886 list[3] != p[1] &&
887 (list[3] != p[2] || !lisprop));
888 break;
889
890 case 9: /* Right alphanum vs left general category */
891 case 10: /* Right space vs left general category */
892 case 11: /* Right word vs left general category */
893 p = posspropstab[n-9];
894 accepted = lisprop && risprop ==
895 (base_list[3] != p[0] &&
896 base_list[3] != p[1] &&
897 (base_list[3] != p[2] || !risprop));
898 break;
899
900 case 12: /* Left alphanum vs right particular category */
901 case 13: /* Left space vs right particular category */
902 case 14: /* Left word vs right particular category */
903 p = posspropstab[n-12];
904 accepted = risprop && lisprop ==
905 (catposstab[p[0]][list[3]] &&
906 catposstab[p[1]][list[3]] &&
907 (list[3] != p[3] || !lisprop));
908 break;
909
910 case 15: /* Right alphanum vs left particular category */
911 case 16: /* Right space vs left particular category */
912 case 17: /* Right word vs left particular category */
913 p = posspropstab[n-15];
914 accepted = lisprop && risprop ==
915 (catposstab[p[0]][base_list[3]] &&
916 catposstab[p[1]][base_list[3]] &&
917 (base_list[3] != p[3] || !risprop));
918 break;
919 }
920 }
921 }
922
923 else
924 #endif /* SUPPORT_UNICODE */
925
926 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
927 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
928 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
929
930 if (!accepted) return FALSE;
931
932 if (list[1] == 0) return TRUE;
933 /* Might be an empty repeat. */
934 continue;
935 }
936
937 /* Control reaches here only if one of the items is a small character list.
938 All characters are checked against the other side. */
939
940 do
941 {
942 chr = *chr_ptr;
943
944 switch(list_ptr[0])
945 {
946 case OP_CHAR:
947 ochr_ptr = list_ptr + 2;
948 do
949 {
950 if (chr == *ochr_ptr) return FALSE;
951 ochr_ptr++;
952 }
953 while(*ochr_ptr != NOTACHAR);
954 break;
955
956 case OP_NOT:
957 ochr_ptr = list_ptr + 2;
958 do
959 {
960 if (chr == *ochr_ptr)
961 break;
962 ochr_ptr++;
963 }
964 while(*ochr_ptr != NOTACHAR);
965 if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
966 break;
967
968 /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
969 set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
970
971 case OP_DIGIT:
972 if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
973 break;
974
975 case OP_NOT_DIGIT:
976 if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
977 break;
978
979 case OP_WHITESPACE:
980 if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
981 break;
982
983 case OP_NOT_WHITESPACE:
984 if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
985 break;
986
987 case OP_WORDCHAR:
988 if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
989 break;
990
991 case OP_NOT_WORDCHAR:
992 if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
993 break;
994
995 case OP_HSPACE:
996 switch(chr)
997 {
998 HSPACE_CASES: return FALSE;
999 default: break;
1000 }
1001 break;
1002
1003 case OP_NOT_HSPACE:
1004 switch(chr)
1005 {
1006 HSPACE_CASES: break;
1007 default: return FALSE;
1008 }
1009 break;
1010
1011 case OP_ANYNL:
1012 case OP_VSPACE:
1013 switch(chr)
1014 {
1015 VSPACE_CASES: return FALSE;
1016 default: break;
1017 }
1018 break;
1019
1020 case OP_NOT_VSPACE:
1021 switch(chr)
1022 {
1023 VSPACE_CASES: break;
1024 default: return FALSE;
1025 }
1026 break;
1027
1028 case OP_DOLL:
1029 case OP_EODN:
1030 switch (chr)
1031 {
1032 case CHAR_CR:
1033 case CHAR_LF:
1034 case CHAR_VT:
1035 case CHAR_FF:
1036 case CHAR_NEL:
1037 #ifndef EBCDIC
1038 case 0x2028:
1039 case 0x2029:
1040 #endif /* Not EBCDIC */
1041 return FALSE;
1042 }
1043 break;
1044
1045 case OP_EOD: /* Can always possessify before \z */
1046 break;
1047
1048 #ifdef SUPPORT_UNICODE
1049 case OP_PROP:
1050 case OP_NOTPROP:
1051 if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1052 list_ptr[0] == OP_NOTPROP))
1053 return FALSE;
1054 break;
1055 #endif
1056
1057 case OP_NCLASS:
1058 if (chr > 255) return FALSE;
1059 /* Fall through */
1060
1061 case OP_CLASS:
1062 if (chr > 255) break;
1063 class_bitset = (uint8_t *)
1064 ((list_ptr == list ? code : base_end) - list_ptr[2]);
1065 if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1066 break;
1067
1068 #ifdef SUPPORT_WIDE_CHARS
1069 case OP_XCLASS:
1070 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1071 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1072 break;
1073 #endif
1074
1075 default:
1076 return FALSE;
1077 }
1078
1079 chr_ptr++;
1080 }
1081 while(*chr_ptr != NOTACHAR);
1082
1083 /* At least one character must be matched from this opcode. */
1084
1085 if (list[1] == 0) return TRUE;
1086 }
1087
1088 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1089 but some compilers complain about an unreachable statement. */
1090 }
1091
1092
1093
1094 /*************************************************
1095 * Scan compiled regex for auto-possession *
1096 *************************************************/
1097
1098 /* Replaces single character iterations with their possessive alternatives
1099 if appropriate. This function modifies the compiled opcode! Hitting a
1100 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1101 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1102 overly complicated or large patterns. In these cases, the check just stops,
1103 leaving the remainder of the pattern unpossessified.
1104
1105 Arguments:
1106 code points to start of the byte code
1107 cb compile data block
1108
1109 Returns: 0 for success
1110 -1 if a non-existant opcode is encountered
1111 */
1112
1113 int
PRIV(auto_possessify)1114 PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1115 {
1116 PCRE2_UCHAR c;
1117 PCRE2_SPTR end;
1118 PCRE2_UCHAR *repeat_opcode;
1119 uint32_t list[8];
1120 int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
1121 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1122 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1123
1124 for (;;)
1125 {
1126 c = *code;
1127
1128 if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */
1129
1130 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1131 {
1132 c -= get_repeat_base(c) - OP_STAR;
1133 end = (c <= OP_MINUPTO) ?
1134 get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1135 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1136
1137 if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1138 &rec_limit))
1139 {
1140 switch(c)
1141 {
1142 case OP_STAR:
1143 *code += OP_POSSTAR - OP_STAR;
1144 break;
1145
1146 case OP_MINSTAR:
1147 *code += OP_POSSTAR - OP_MINSTAR;
1148 break;
1149
1150 case OP_PLUS:
1151 *code += OP_POSPLUS - OP_PLUS;
1152 break;
1153
1154 case OP_MINPLUS:
1155 *code += OP_POSPLUS - OP_MINPLUS;
1156 break;
1157
1158 case OP_QUERY:
1159 *code += OP_POSQUERY - OP_QUERY;
1160 break;
1161
1162 case OP_MINQUERY:
1163 *code += OP_POSQUERY - OP_MINQUERY;
1164 break;
1165
1166 case OP_UPTO:
1167 *code += OP_POSUPTO - OP_UPTO;
1168 break;
1169
1170 case OP_MINUPTO:
1171 *code += OP_POSUPTO - OP_MINUPTO;
1172 break;
1173 }
1174 }
1175 c = *code;
1176 }
1177 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1178 {
1179 #ifdef SUPPORT_WIDE_CHARS
1180 if (c == OP_XCLASS)
1181 repeat_opcode = code + GET(code, 1);
1182 else
1183 #endif
1184 repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1185
1186 c = *repeat_opcode;
1187 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1188 {
1189 /* end must not be NULL. */
1190 end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1191
1192 list[1] = (c & 1) == 0;
1193
1194 if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1195 {
1196 switch (c)
1197 {
1198 case OP_CRSTAR:
1199 case OP_CRMINSTAR:
1200 *repeat_opcode = OP_CRPOSSTAR;
1201 break;
1202
1203 case OP_CRPLUS:
1204 case OP_CRMINPLUS:
1205 *repeat_opcode = OP_CRPOSPLUS;
1206 break;
1207
1208 case OP_CRQUERY:
1209 case OP_CRMINQUERY:
1210 *repeat_opcode = OP_CRPOSQUERY;
1211 break;
1212
1213 case OP_CRRANGE:
1214 case OP_CRMINRANGE:
1215 *repeat_opcode = OP_CRPOSRANGE;
1216 break;
1217 }
1218 }
1219 }
1220 c = *code;
1221 }
1222
1223 switch(c)
1224 {
1225 case OP_END:
1226 return 0;
1227
1228 case OP_TYPESTAR:
1229 case OP_TYPEMINSTAR:
1230 case OP_TYPEPLUS:
1231 case OP_TYPEMINPLUS:
1232 case OP_TYPEQUERY:
1233 case OP_TYPEMINQUERY:
1234 case OP_TYPEPOSSTAR:
1235 case OP_TYPEPOSPLUS:
1236 case OP_TYPEPOSQUERY:
1237 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1238 break;
1239
1240 case OP_TYPEUPTO:
1241 case OP_TYPEMINUPTO:
1242 case OP_TYPEEXACT:
1243 case OP_TYPEPOSUPTO:
1244 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1245 code += 2;
1246 break;
1247
1248 case OP_CALLOUT_STR:
1249 code += GET(code, 1 + 2*LINK_SIZE);
1250 break;
1251
1252 #ifdef SUPPORT_WIDE_CHARS
1253 case OP_XCLASS:
1254 code += GET(code, 1);
1255 break;
1256 #endif
1257
1258 case OP_MARK:
1259 case OP_COMMIT_ARG:
1260 case OP_PRUNE_ARG:
1261 case OP_SKIP_ARG:
1262 case OP_THEN_ARG:
1263 code += code[1];
1264 break;
1265 }
1266
1267 /* Add in the fixed length from the table */
1268
1269 code += PRIV(OP_lengths)[c];
1270
1271 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1272 followed by a multi-byte character. The length in the table is a minimum, so
1273 we have to arrange to skip the extra code units. */
1274
1275 #ifdef MAYBE_UTF_MULTI
1276 if (utf) switch(c)
1277 {
1278 case OP_CHAR:
1279 case OP_CHARI:
1280 case OP_NOT:
1281 case OP_NOTI:
1282 case OP_STAR:
1283 case OP_MINSTAR:
1284 case OP_PLUS:
1285 case OP_MINPLUS:
1286 case OP_QUERY:
1287 case OP_MINQUERY:
1288 case OP_UPTO:
1289 case OP_MINUPTO:
1290 case OP_EXACT:
1291 case OP_POSSTAR:
1292 case OP_POSPLUS:
1293 case OP_POSQUERY:
1294 case OP_POSUPTO:
1295 case OP_STARI:
1296 case OP_MINSTARI:
1297 case OP_PLUSI:
1298 case OP_MINPLUSI:
1299 case OP_QUERYI:
1300 case OP_MINQUERYI:
1301 case OP_UPTOI:
1302 case OP_MINUPTOI:
1303 case OP_EXACTI:
1304 case OP_POSSTARI:
1305 case OP_POSPLUSI:
1306 case OP_POSQUERYI:
1307 case OP_POSUPTOI:
1308 case OP_NOTSTAR:
1309 case OP_NOTMINSTAR:
1310 case OP_NOTPLUS:
1311 case OP_NOTMINPLUS:
1312 case OP_NOTQUERY:
1313 case OP_NOTMINQUERY:
1314 case OP_NOTUPTO:
1315 case OP_NOTMINUPTO:
1316 case OP_NOTEXACT:
1317 case OP_NOTPOSSTAR:
1318 case OP_NOTPOSPLUS:
1319 case OP_NOTPOSQUERY:
1320 case OP_NOTPOSUPTO:
1321 case OP_NOTSTARI:
1322 case OP_NOTMINSTARI:
1323 case OP_NOTPLUSI:
1324 case OP_NOTMINPLUSI:
1325 case OP_NOTQUERYI:
1326 case OP_NOTMINQUERYI:
1327 case OP_NOTUPTOI:
1328 case OP_NOTMINUPTOI:
1329 case OP_NOTEXACTI:
1330 case OP_NOTPOSSTARI:
1331 case OP_NOTPOSPLUSI:
1332 case OP_NOTPOSQUERYI:
1333 case OP_NOTPOSUPTOI:
1334 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1335 break;
1336 }
1337 #else
1338 (void)(utf); /* Keep compiler happy by referencing function argument */
1339 #endif /* SUPPORT_WIDE_CHARS */
1340 }
1341 }
1342
1343 /* End of pcre2_auto_possess.c */
1344