1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49
50 #include "pcre2_internal.h"
51
52
53 /*************************************************
54 * Tables for auto-possessification *
55 *************************************************/
56
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
75 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
76 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
77 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
78 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
79 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
80 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
86 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
88 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
90 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
92 };
93
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99
100 0 Always return FALSE (never auto-possessify)
101 1 Character groups are distinct (possessify if both are OP_PROP)
102 2 Check character categories in the same group (general or particular)
103 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104
105 4 Check left general category vs right particular category
106 5 Check right general category vs left particular category
107
108 6 Left alphanum vs right general category
109 7 Left space vs right general category
110 8 Left word vs right general category
111
112 9 Right alphanum vs left general category
113 10 Right space vs left general category
114 11 Right word vs left general category
115
116 12 Left alphanum vs right particular category
117 13 Left space vs right particular category
118 14 Left word vs right particular category
119
120 15 Right alphanum vs left particular category
121 16 Right space vs left particular category
122 17 Right word vs left particular category
123 */
124
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
128 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
129 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
130 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
131 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
132 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
133 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
134 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
135 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
136 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
138 };
139
140 /* This table is used to check whether auto-possessification is possible
141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
142 specifies a general category and the other specifies a particular category. The
143 row is selected by the general category and the column by the particular
144 category. The value is 1 if the particular category is not part of the general
145 category. */
146
147 static const uint8_t catposstab[7][30] = {
148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
149 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
150 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
151 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
152 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
153 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
154 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
155 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
156 };
157
158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
159 a general or particular category. The properties in each row are those
160 that apply to the character set in question. Duplication means that a little
161 unnecessary work is done when checking, but this keeps things much simpler
162 because they can all use the same code. For more details see the comment where
163 this table is used.
164
165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
166 "space", but from Perl 5.18 it's included, so both categories are treated the
167 same here. */
168
169 static const uint8_t posspropstab[3][4] = {
170 { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
171 { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
172 { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
173 };
174 #endif /* SUPPORT_UNICODE */
175
176
177
178 #ifdef SUPPORT_UNICODE
179 /*************************************************
180 * Check a character and a property *
181 *************************************************/
182
183 /* This function is called by compare_opcodes() when a property item is
184 adjacent to a fixed character.
185
186 Arguments:
187 c the character
188 ptype the property type
189 pdata the data for the type
190 negated TRUE if it's a negated property (\P or \p{^)
191
192 Returns: TRUE if auto-possessifying is OK
193 */
194
195 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
197 BOOL negated)
198 {
199 const uint32_t *p;
200 const ucd_record *prop = GET_UCD(c);
201
202 switch(ptype)
203 {
204 case PT_LAMP:
205 return (prop->chartype == ucp_Lu ||
206 prop->chartype == ucp_Ll ||
207 prop->chartype == ucp_Lt) == negated;
208
209 case PT_GC:
210 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
211
212 case PT_PC:
213 return (pdata == prop->chartype) == negated;
214
215 case PT_SC:
216 return (pdata == prop->script) == negated;
217
218 /* These are specials */
219
220 case PT_ALNUM:
221 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
222 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
223
224 /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
225 means that Perl space and POSIX space are now identical. PCRE was changed
226 at release 8.34. */
227
228 case PT_SPACE: /* Perl space */
229 case PT_PXSPACE: /* POSIX space */
230 switch(c)
231 {
232 HSPACE_CASES:
233 VSPACE_CASES:
234 return negated;
235
236 default:
237 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
238 }
239 break; /* Control never reaches here */
240
241 case PT_WORD:
242 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
243 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
244 c == CHAR_UNDERSCORE) == negated;
245
246 case PT_CLIST:
247 p = PRIV(ucd_caseless_sets) + prop->caseset;
248 for (;;)
249 {
250 if (c < *p) return !negated;
251 if (c == *p++) return negated;
252 }
253 break; /* Control never reaches here */
254 }
255
256 return FALSE;
257 }
258 #endif /* SUPPORT_UNICODE */
259
260
261
262 /*************************************************
263 * Base opcode of repeated opcodes *
264 *************************************************/
265
266 /* Returns the base opcode for repeated single character type opcodes. If the
267 opcode is not a repeated character type, it returns with the original value.
268
269 Arguments: c opcode
270 Returns: base opcode for the type
271 */
272
273 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)274 get_repeat_base(PCRE2_UCHAR c)
275 {
276 return (c > OP_TYPEPOSUPTO)? c :
277 (c >= OP_TYPESTAR)? OP_TYPESTAR :
278 (c >= OP_NOTSTARI)? OP_NOTSTARI :
279 (c >= OP_NOTSTAR)? OP_NOTSTAR :
280 (c >= OP_STARI)? OP_STARI :
281 OP_STAR;
282 }
283
284
285 /*************************************************
286 * Fill the character property list *
287 *************************************************/
288
289 /* Checks whether the code points to an opcode that can take part in auto-
290 possessification, and if so, fills a list with its properties.
291
292 Arguments:
293 code points to start of expression
294 utf TRUE if in UTF mode
295 fcc points to the case-flipping table
296 list points to output list
297 list[0] will be filled with the opcode
298 list[1] will be non-zero if this opcode
299 can match an empty character string
300 list[2..7] depends on the opcode
301
302 Returns: points to the start of the next opcode if *code is accepted
303 NULL if *code is not accepted
304 */
305
306 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,const uint8_t * fcc,uint32_t * list)307 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
308 uint32_t *list)
309 {
310 PCRE2_UCHAR c = *code;
311 PCRE2_UCHAR base;
312 PCRE2_SPTR end;
313 uint32_t chr;
314
315 #ifdef SUPPORT_UNICODE
316 uint32_t *clist_dest;
317 const uint32_t *clist_src;
318 #else
319 (void)utf; /* Suppress "unused parameter" compiler warning */
320 #endif
321
322 list[0] = c;
323 list[1] = FALSE;
324 code++;
325
326 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
327 {
328 base = get_repeat_base(c);
329 c -= (base - OP_STAR);
330
331 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
332 code += IMM2_SIZE;
333
334 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
335 c != OP_POSPLUS);
336
337 switch(base)
338 {
339 case OP_STAR:
340 list[0] = OP_CHAR;
341 break;
342
343 case OP_STARI:
344 list[0] = OP_CHARI;
345 break;
346
347 case OP_NOTSTAR:
348 list[0] = OP_NOT;
349 break;
350
351 case OP_NOTSTARI:
352 list[0] = OP_NOTI;
353 break;
354
355 case OP_TYPESTAR:
356 list[0] = *code;
357 code++;
358 break;
359 }
360 c = list[0];
361 }
362
363 switch(c)
364 {
365 case OP_NOT_DIGIT:
366 case OP_DIGIT:
367 case OP_NOT_WHITESPACE:
368 case OP_WHITESPACE:
369 case OP_NOT_WORDCHAR:
370 case OP_WORDCHAR:
371 case OP_ANY:
372 case OP_ALLANY:
373 case OP_ANYNL:
374 case OP_NOT_HSPACE:
375 case OP_HSPACE:
376 case OP_NOT_VSPACE:
377 case OP_VSPACE:
378 case OP_EXTUNI:
379 case OP_EODN:
380 case OP_EOD:
381 case OP_DOLL:
382 case OP_DOLLM:
383 return code;
384
385 case OP_CHAR:
386 case OP_NOT:
387 GETCHARINCTEST(chr, code);
388 list[2] = chr;
389 list[3] = NOTACHAR;
390 return code;
391
392 case OP_CHARI:
393 case OP_NOTI:
394 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
395 GETCHARINCTEST(chr, code);
396 list[2] = chr;
397
398 #ifdef SUPPORT_UNICODE
399 if (chr < 128 || (chr < 256 && !utf))
400 list[3] = fcc[chr];
401 else
402 list[3] = UCD_OTHERCASE(chr);
403 #elif defined SUPPORT_WIDE_CHARS
404 list[3] = (chr < 256) ? fcc[chr] : chr;
405 #else
406 list[3] = fcc[chr];
407 #endif
408
409 /* The othercase might be the same value. */
410
411 if (chr == list[3])
412 list[3] = NOTACHAR;
413 else
414 list[4] = NOTACHAR;
415 return code;
416
417 #ifdef SUPPORT_UNICODE
418 case OP_PROP:
419 case OP_NOTPROP:
420 if (code[0] != PT_CLIST)
421 {
422 list[2] = code[0];
423 list[3] = code[1];
424 return code + 2;
425 }
426
427 /* Convert only if we have enough space. */
428
429 clist_src = PRIV(ucd_caseless_sets) + code[1];
430 clist_dest = list + 2;
431 code += 2;
432
433 do {
434 if (clist_dest >= list + 8)
435 {
436 /* Early return if there is not enough space. This should never
437 happen, since all clists are shorter than 5 character now. */
438 list[2] = code[0];
439 list[3] = code[1];
440 return code;
441 }
442 *clist_dest++ = *clist_src;
443 }
444 while(*clist_src++ != NOTACHAR);
445
446 /* All characters are stored. The terminating NOTACHAR is copied from the
447 clist itself. */
448
449 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
450 return code;
451 #endif
452
453 case OP_NCLASS:
454 case OP_CLASS:
455 #ifdef SUPPORT_WIDE_CHARS
456 case OP_XCLASS:
457 if (c == OP_XCLASS)
458 end = code + GET(code, 0) - 1;
459 else
460 #endif
461 end = code + 32 / sizeof(PCRE2_UCHAR);
462
463 switch(*end)
464 {
465 case OP_CRSTAR:
466 case OP_CRMINSTAR:
467 case OP_CRQUERY:
468 case OP_CRMINQUERY:
469 case OP_CRPOSSTAR:
470 case OP_CRPOSQUERY:
471 list[1] = TRUE;
472 end++;
473 break;
474
475 case OP_CRPLUS:
476 case OP_CRMINPLUS:
477 case OP_CRPOSPLUS:
478 end++;
479 break;
480
481 case OP_CRRANGE:
482 case OP_CRMINRANGE:
483 case OP_CRPOSRANGE:
484 list[1] = (GET2(end, 1) == 0);
485 end += 1 + 2 * IMM2_SIZE;
486 break;
487 }
488 list[2] = (uint32_t)(end - code);
489 return end;
490 }
491 return NULL; /* Opcode not accepted */
492 }
493
494
495
496 /*************************************************
497 * Scan further character sets for match *
498 *************************************************/
499
500 /* Checks whether the base and the current opcode have a common character, in
501 which case the base cannot be possessified.
502
503 Arguments:
504 code points to the byte code
505 utf TRUE in UTF mode
506 cb compile data block
507 base_list the data list of the base opcode
508 base_end the end of the base opcode
509 rec_limit points to recursion depth counter
510
511 Returns: TRUE if the auto-possessification is possible
512 */
513
514 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)515 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
516 const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
517 {
518 PCRE2_UCHAR c;
519 uint32_t list[8];
520 const uint32_t *chr_ptr;
521 const uint32_t *ochr_ptr;
522 const uint32_t *list_ptr;
523 PCRE2_SPTR next_code;
524 #ifdef SUPPORT_WIDE_CHARS
525 PCRE2_SPTR xclass_flags;
526 #endif
527 const uint8_t *class_bitset;
528 const uint8_t *set1, *set2, *set_end;
529 uint32_t chr;
530 BOOL accepted, invert_bits;
531 BOOL entered_a_group = FALSE;
532
533 if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
534
535 /* Note: the base_list[1] contains whether the current opcode has a greedy
536 (represented by a non-zero value) quantifier. This is a different from
537 other character type lists, which store here that the character iterator
538 matches to an empty string (also represented by a non-zero value). */
539
540 for(;;)
541 {
542 /* All operations move the code pointer forward.
543 Therefore infinite recursions are not possible. */
544
545 c = *code;
546
547 /* Skip over callouts */
548
549 if (c == OP_CALLOUT)
550 {
551 code += PRIV(OP_lengths)[c];
552 continue;
553 }
554
555 if (c == OP_CALLOUT_STR)
556 {
557 code += GET(code, 1 + 2*LINK_SIZE);
558 continue;
559 }
560
561 /* At the end of a branch, skip to the end of the group. */
562
563 if (c == OP_ALT)
564 {
565 do code += GET(code, 1); while (*code == OP_ALT);
566 c = *code;
567 }
568
569 /* Inspect the next opcode. */
570
571 switch(c)
572 {
573 /* We can always possessify a greedy iterator at the end of the pattern,
574 which is reached after skipping over the final OP_KET. A non-greedy
575 iterator must never be possessified. */
576
577 case OP_END:
578 return base_list[1] != 0;
579
580 /* When an iterator is at the end of certain kinds of group we can inspect
581 what follows the group by skipping over the closing ket. Note that this
582 does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
583 iteration is variable (could be another iteration or could be the next
584 item). As these two opcodes are not listed in the next switch, they will
585 end up as the next code to inspect, and return FALSE by virtue of being
586 unsupported. */
587
588 case OP_KET:
589 case OP_KETRPOS:
590 /* The non-greedy case cannot be converted to a possessive form. */
591
592 if (base_list[1] == 0) return FALSE;
593
594 /* If the bracket is capturing it might be referenced by an OP_RECURSE
595 so its last iterator can never be possessified if the pattern contains
596 recursions. (This could be improved by keeping a list of group numbers that
597 are called by recursion.) */
598
599 switch(*(code - GET(code, 1)))
600 {
601 case OP_CBRA:
602 case OP_SCBRA:
603 case OP_CBRAPOS:
604 case OP_SCBRAPOS:
605 if (cb->had_recurse) return FALSE;
606 break;
607
608 /* Atomic sub-patterns and assertions can always auto-possessify their
609 last iterator. However, if the group was entered as a result of checking
610 a previous iterator, this is not possible. */
611
612 case OP_ASSERT:
613 case OP_ASSERT_NOT:
614 case OP_ASSERTBACK:
615 case OP_ASSERTBACK_NOT:
616 case OP_ONCE:
617
618 return !entered_a_group;
619 }
620
621 /* Skip over the bracket and inspect what comes next. */
622
623 code += PRIV(OP_lengths)[c];
624 continue;
625
626 /* Handle cases where the next item is a group. */
627
628 case OP_ONCE:
629 case OP_BRA:
630 case OP_CBRA:
631 next_code = code + GET(code, 1);
632 code += PRIV(OP_lengths)[c];
633
634 /* Check each branch. We have to recurse a level for all but the last
635 branch. */
636
637 while (*next_code == OP_ALT)
638 {
639 if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
640 return FALSE;
641 code = next_code + 1 + LINK_SIZE;
642 next_code += GET(next_code, 1);
643 }
644
645 entered_a_group = TRUE;
646 continue;
647
648 case OP_BRAZERO:
649 case OP_BRAMINZERO:
650
651 next_code = code + 1;
652 if (*next_code != OP_BRA && *next_code != OP_CBRA &&
653 *next_code != OP_ONCE) return FALSE;
654
655 do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
656
657 /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
658
659 next_code += 1 + LINK_SIZE;
660 if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
661 return FALSE;
662
663 code += PRIV(OP_lengths)[c];
664 continue;
665
666 /* The next opcode does not need special handling; fall through and use it
667 to see if the base can be possessified. */
668
669 default:
670 break;
671 }
672
673 /* We now have the next appropriate opcode to compare with the base. Check
674 for a supported opcode, and load its properties. */
675
676 code = get_chr_property_list(code, utf, cb->fcc, list);
677 if (code == NULL) return FALSE; /* Unsupported */
678
679 /* If either opcode is a small character list, set pointers for comparing
680 characters from that list with another list, or with a property. */
681
682 if (base_list[0] == OP_CHAR)
683 {
684 chr_ptr = base_list + 2;
685 list_ptr = list;
686 }
687 else if (list[0] == OP_CHAR)
688 {
689 chr_ptr = list + 2;
690 list_ptr = base_list;
691 }
692
693 /* Character bitsets can also be compared to certain opcodes. */
694
695 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
696 #if PCRE2_CODE_UNIT_WIDTH == 8
697 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
698 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
699 #endif
700 )
701 {
702 #if PCRE2_CODE_UNIT_WIDTH == 8
703 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
704 #else
705 if (base_list[0] == OP_CLASS)
706 #endif
707 {
708 set1 = (uint8_t *)(base_end - base_list[2]);
709 list_ptr = list;
710 }
711 else
712 {
713 set1 = (uint8_t *)(code - list[2]);
714 list_ptr = base_list;
715 }
716
717 invert_bits = FALSE;
718 switch(list_ptr[0])
719 {
720 case OP_CLASS:
721 case OP_NCLASS:
722 set2 = (uint8_t *)
723 ((list_ptr == list ? code : base_end) - list_ptr[2]);
724 break;
725
726 #ifdef SUPPORT_WIDE_CHARS
727 case OP_XCLASS:
728 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
729 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
730 if ((*xclass_flags & XCL_MAP) == 0)
731 {
732 /* No bits are set for characters < 256. */
733 if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
734 /* Might be an empty repeat. */
735 continue;
736 }
737 set2 = (uint8_t *)(xclass_flags + 1);
738 break;
739 #endif
740
741 case OP_NOT_DIGIT:
742 invert_bits = TRUE;
743 /* Fall through */
744 case OP_DIGIT:
745 set2 = (uint8_t *)(cb->cbits + cbit_digit);
746 break;
747
748 case OP_NOT_WHITESPACE:
749 invert_bits = TRUE;
750 /* Fall through */
751 case OP_WHITESPACE:
752 set2 = (uint8_t *)(cb->cbits + cbit_space);
753 break;
754
755 case OP_NOT_WORDCHAR:
756 invert_bits = TRUE;
757 /* Fall through */
758 case OP_WORDCHAR:
759 set2 = (uint8_t *)(cb->cbits + cbit_word);
760 break;
761
762 default:
763 return FALSE;
764 }
765
766 /* Because the bit sets are unaligned bytes, we need to perform byte
767 comparison here. */
768
769 set_end = set1 + 32;
770 if (invert_bits)
771 {
772 do
773 {
774 if ((*set1++ & ~(*set2++)) != 0) return FALSE;
775 }
776 while (set1 < set_end);
777 }
778 else
779 {
780 do
781 {
782 if ((*set1++ & *set2++) != 0) return FALSE;
783 }
784 while (set1 < set_end);
785 }
786
787 if (list[1] == 0) return TRUE;
788 /* Might be an empty repeat. */
789 continue;
790 }
791
792 /* Some property combinations also acceptable. Unicode property opcodes are
793 processed specially; the rest can be handled with a lookup table. */
794
795 else
796 {
797 uint32_t leftop, rightop;
798
799 leftop = base_list[0];
800 rightop = list[0];
801
802 #ifdef SUPPORT_UNICODE
803 accepted = FALSE; /* Always set in non-unicode case. */
804 if (leftop == OP_PROP || leftop == OP_NOTPROP)
805 {
806 if (rightop == OP_EOD)
807 accepted = TRUE;
808 else if (rightop == OP_PROP || rightop == OP_NOTPROP)
809 {
810 int n;
811 const uint8_t *p;
812 BOOL same = leftop == rightop;
813 BOOL lisprop = leftop == OP_PROP;
814 BOOL risprop = rightop == OP_PROP;
815 BOOL bothprop = lisprop && risprop;
816
817 /* There's a table that specifies how each combination is to be
818 processed:
819 0 Always return FALSE (never auto-possessify)
820 1 Character groups are distinct (possessify if both are OP_PROP)
821 2 Check character categories in the same group (general or particular)
822 3 Return TRUE if the two opcodes are not the same
823 ... see comments below
824 */
825
826 n = propposstab[base_list[2]][list[2]];
827 switch(n)
828 {
829 case 0: break;
830 case 1: accepted = bothprop; break;
831 case 2: accepted = (base_list[3] == list[3]) != same; break;
832 case 3: accepted = !same; break;
833
834 case 4: /* Left general category, right particular category */
835 accepted = risprop && catposstab[base_list[3]][list[3]] == same;
836 break;
837
838 case 5: /* Right general category, left particular category */
839 accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
840 break;
841
842 /* This code is logically tricky. Think hard before fiddling with it.
843 The posspropstab table has four entries per row. Each row relates to
844 one of PCRE's special properties such as ALNUM or SPACE or WORD.
845 Only WORD actually needs all four entries, but using repeats for the
846 others means they can all use the same code below.
847
848 The first two entries in each row are Unicode general categories, and
849 apply always, because all the characters they include are part of the
850 PCRE character set. The third and fourth entries are a general and a
851 particular category, respectively, that include one or more relevant
852 characters. One or the other is used, depending on whether the check
853 is for a general or a particular category. However, in both cases the
854 category contains more characters than the specials that are defined
855 for the property being tested against. Therefore, it cannot be used
856 in a NOTPROP case.
857
858 Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
859 Underscore is covered by ucp_P or ucp_Po. */
860
861 case 6: /* Left alphanum vs right general category */
862 case 7: /* Left space vs right general category */
863 case 8: /* Left word vs right general category */
864 p = posspropstab[n-6];
865 accepted = risprop && lisprop ==
866 (list[3] != p[0] &&
867 list[3] != p[1] &&
868 (list[3] != p[2] || !lisprop));
869 break;
870
871 case 9: /* Right alphanum vs left general category */
872 case 10: /* Right space vs left general category */
873 case 11: /* Right word vs left general category */
874 p = posspropstab[n-9];
875 accepted = lisprop && risprop ==
876 (base_list[3] != p[0] &&
877 base_list[3] != p[1] &&
878 (base_list[3] != p[2] || !risprop));
879 break;
880
881 case 12: /* Left alphanum vs right particular category */
882 case 13: /* Left space vs right particular category */
883 case 14: /* Left word vs right particular category */
884 p = posspropstab[n-12];
885 accepted = risprop && lisprop ==
886 (catposstab[p[0]][list[3]] &&
887 catposstab[p[1]][list[3]] &&
888 (list[3] != p[3] || !lisprop));
889 break;
890
891 case 15: /* Right alphanum vs left particular category */
892 case 16: /* Right space vs left particular category */
893 case 17: /* Right word vs left particular category */
894 p = posspropstab[n-15];
895 accepted = lisprop && risprop ==
896 (catposstab[p[0]][base_list[3]] &&
897 catposstab[p[1]][base_list[3]] &&
898 (base_list[3] != p[3] || !risprop));
899 break;
900 }
901 }
902 }
903
904 else
905 #endif /* SUPPORT_UNICODE */
906
907 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
908 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
909 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
910
911 if (!accepted) return FALSE;
912
913 if (list[1] == 0) return TRUE;
914 /* Might be an empty repeat. */
915 continue;
916 }
917
918 /* Control reaches here only if one of the items is a small character list.
919 All characters are checked against the other side. */
920
921 do
922 {
923 chr = *chr_ptr;
924
925 switch(list_ptr[0])
926 {
927 case OP_CHAR:
928 ochr_ptr = list_ptr + 2;
929 do
930 {
931 if (chr == *ochr_ptr) return FALSE;
932 ochr_ptr++;
933 }
934 while(*ochr_ptr != NOTACHAR);
935 break;
936
937 case OP_NOT:
938 ochr_ptr = list_ptr + 2;
939 do
940 {
941 if (chr == *ochr_ptr)
942 break;
943 ochr_ptr++;
944 }
945 while(*ochr_ptr != NOTACHAR);
946 if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
947 break;
948
949 /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
950 set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
951
952 case OP_DIGIT:
953 if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
954 break;
955
956 case OP_NOT_DIGIT:
957 if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
958 break;
959
960 case OP_WHITESPACE:
961 if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
962 break;
963
964 case OP_NOT_WHITESPACE:
965 if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
966 break;
967
968 case OP_WORDCHAR:
969 if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
970 break;
971
972 case OP_NOT_WORDCHAR:
973 if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
974 break;
975
976 case OP_HSPACE:
977 switch(chr)
978 {
979 HSPACE_CASES: return FALSE;
980 default: break;
981 }
982 break;
983
984 case OP_NOT_HSPACE:
985 switch(chr)
986 {
987 HSPACE_CASES: break;
988 default: return FALSE;
989 }
990 break;
991
992 case OP_ANYNL:
993 case OP_VSPACE:
994 switch(chr)
995 {
996 VSPACE_CASES: return FALSE;
997 default: break;
998 }
999 break;
1000
1001 case OP_NOT_VSPACE:
1002 switch(chr)
1003 {
1004 VSPACE_CASES: break;
1005 default: return FALSE;
1006 }
1007 break;
1008
1009 case OP_DOLL:
1010 case OP_EODN:
1011 switch (chr)
1012 {
1013 case CHAR_CR:
1014 case CHAR_LF:
1015 case CHAR_VT:
1016 case CHAR_FF:
1017 case CHAR_NEL:
1018 #ifndef EBCDIC
1019 case 0x2028:
1020 case 0x2029:
1021 #endif /* Not EBCDIC */
1022 return FALSE;
1023 }
1024 break;
1025
1026 case OP_EOD: /* Can always possessify before \z */
1027 break;
1028
1029 #ifdef SUPPORT_UNICODE
1030 case OP_PROP:
1031 case OP_NOTPROP:
1032 if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1033 list_ptr[0] == OP_NOTPROP))
1034 return FALSE;
1035 break;
1036 #endif
1037
1038 case OP_NCLASS:
1039 if (chr > 255) return FALSE;
1040 /* Fall through */
1041
1042 case OP_CLASS:
1043 if (chr > 255) break;
1044 class_bitset = (uint8_t *)
1045 ((list_ptr == list ? code : base_end) - list_ptr[2]);
1046 if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
1047 break;
1048
1049 #ifdef SUPPORT_WIDE_CHARS
1050 case OP_XCLASS:
1051 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1052 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1053 break;
1054 #endif
1055
1056 default:
1057 return FALSE;
1058 }
1059
1060 chr_ptr++;
1061 }
1062 while(*chr_ptr != NOTACHAR);
1063
1064 /* At least one character must be matched from this opcode. */
1065
1066 if (list[1] == 0) return TRUE;
1067 }
1068
1069 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1070 but some compilers complain about an unreachable statement. */
1071 }
1072
1073
1074
1075 /*************************************************
1076 * Scan compiled regex for auto-possession *
1077 *************************************************/
1078
1079 /* Replaces single character iterations with their possessive alternatives
1080 if appropriate. This function modifies the compiled opcode! Hitting a
1081 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1082 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1083 overly complicated or large patterns. In these cases, the check just stops,
1084 leaving the remainder of the pattern unpossessified.
1085
1086 Arguments:
1087 code points to start of the byte code
1088 utf TRUE in UTF mode
1089 cb compile data block
1090
1091 Returns: 0 for success
1092 -1 if a non-existant opcode is encountered
1093 */
1094
1095 int
PRIV(auto_possessify)1096 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
1097 {
1098 PCRE2_UCHAR c;
1099 PCRE2_SPTR end;
1100 PCRE2_UCHAR *repeat_opcode;
1101 uint32_t list[8];
1102 int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
1103
1104 for (;;)
1105 {
1106 c = *code;
1107
1108 if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */
1109
1110 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1111 {
1112 c -= get_repeat_base(c) - OP_STAR;
1113 end = (c <= OP_MINUPTO) ?
1114 get_chr_property_list(code, utf, cb->fcc, list) : NULL;
1115 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1116
1117 if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
1118 {
1119 switch(c)
1120 {
1121 case OP_STAR:
1122 *code += OP_POSSTAR - OP_STAR;
1123 break;
1124
1125 case OP_MINSTAR:
1126 *code += OP_POSSTAR - OP_MINSTAR;
1127 break;
1128
1129 case OP_PLUS:
1130 *code += OP_POSPLUS - OP_PLUS;
1131 break;
1132
1133 case OP_MINPLUS:
1134 *code += OP_POSPLUS - OP_MINPLUS;
1135 break;
1136
1137 case OP_QUERY:
1138 *code += OP_POSQUERY - OP_QUERY;
1139 break;
1140
1141 case OP_MINQUERY:
1142 *code += OP_POSQUERY - OP_MINQUERY;
1143 break;
1144
1145 case OP_UPTO:
1146 *code += OP_POSUPTO - OP_UPTO;
1147 break;
1148
1149 case OP_MINUPTO:
1150 *code += OP_POSUPTO - OP_MINUPTO;
1151 break;
1152 }
1153 }
1154 c = *code;
1155 }
1156 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1157 {
1158 #ifdef SUPPORT_WIDE_CHARS
1159 if (c == OP_XCLASS)
1160 repeat_opcode = code + GET(code, 1);
1161 else
1162 #endif
1163 repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1164
1165 c = *repeat_opcode;
1166 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1167 {
1168 /* end must not be NULL. */
1169 end = get_chr_property_list(code, utf, cb->fcc, list);
1170
1171 list[1] = (c & 1) == 0;
1172
1173 if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
1174 {
1175 switch (c)
1176 {
1177 case OP_CRSTAR:
1178 case OP_CRMINSTAR:
1179 *repeat_opcode = OP_CRPOSSTAR;
1180 break;
1181
1182 case OP_CRPLUS:
1183 case OP_CRMINPLUS:
1184 *repeat_opcode = OP_CRPOSPLUS;
1185 break;
1186
1187 case OP_CRQUERY:
1188 case OP_CRMINQUERY:
1189 *repeat_opcode = OP_CRPOSQUERY;
1190 break;
1191
1192 case OP_CRRANGE:
1193 case OP_CRMINRANGE:
1194 *repeat_opcode = OP_CRPOSRANGE;
1195 break;
1196 }
1197 }
1198 }
1199 c = *code;
1200 }
1201
1202 switch(c)
1203 {
1204 case OP_END:
1205 return 0;
1206
1207 case OP_TYPESTAR:
1208 case OP_TYPEMINSTAR:
1209 case OP_TYPEPLUS:
1210 case OP_TYPEMINPLUS:
1211 case OP_TYPEQUERY:
1212 case OP_TYPEMINQUERY:
1213 case OP_TYPEPOSSTAR:
1214 case OP_TYPEPOSPLUS:
1215 case OP_TYPEPOSQUERY:
1216 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1217 break;
1218
1219 case OP_TYPEUPTO:
1220 case OP_TYPEMINUPTO:
1221 case OP_TYPEEXACT:
1222 case OP_TYPEPOSUPTO:
1223 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1224 code += 2;
1225 break;
1226
1227 case OP_CALLOUT_STR:
1228 code += GET(code, 1 + 2*LINK_SIZE);
1229 break;
1230
1231 #ifdef SUPPORT_WIDE_CHARS
1232 case OP_XCLASS:
1233 code += GET(code, 1);
1234 break;
1235 #endif
1236
1237 case OP_MARK:
1238 case OP_COMMIT_ARG:
1239 case OP_PRUNE_ARG:
1240 case OP_SKIP_ARG:
1241 case OP_THEN_ARG:
1242 code += code[1];
1243 break;
1244 }
1245
1246 /* Add in the fixed length from the table */
1247
1248 code += PRIV(OP_lengths)[c];
1249
1250 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1251 followed by a multi-byte character. The length in the table is a minimum, so
1252 we have to arrange to skip the extra code units. */
1253
1254 #ifdef MAYBE_UTF_MULTI
1255 if (utf) switch(c)
1256 {
1257 case OP_CHAR:
1258 case OP_CHARI:
1259 case OP_NOT:
1260 case OP_NOTI:
1261 case OP_STAR:
1262 case OP_MINSTAR:
1263 case OP_PLUS:
1264 case OP_MINPLUS:
1265 case OP_QUERY:
1266 case OP_MINQUERY:
1267 case OP_UPTO:
1268 case OP_MINUPTO:
1269 case OP_EXACT:
1270 case OP_POSSTAR:
1271 case OP_POSPLUS:
1272 case OP_POSQUERY:
1273 case OP_POSUPTO:
1274 case OP_STARI:
1275 case OP_MINSTARI:
1276 case OP_PLUSI:
1277 case OP_MINPLUSI:
1278 case OP_QUERYI:
1279 case OP_MINQUERYI:
1280 case OP_UPTOI:
1281 case OP_MINUPTOI:
1282 case OP_EXACTI:
1283 case OP_POSSTARI:
1284 case OP_POSPLUSI:
1285 case OP_POSQUERYI:
1286 case OP_POSUPTOI:
1287 case OP_NOTSTAR:
1288 case OP_NOTMINSTAR:
1289 case OP_NOTPLUS:
1290 case OP_NOTMINPLUS:
1291 case OP_NOTQUERY:
1292 case OP_NOTMINQUERY:
1293 case OP_NOTUPTO:
1294 case OP_NOTMINUPTO:
1295 case OP_NOTEXACT:
1296 case OP_NOTPOSSTAR:
1297 case OP_NOTPOSPLUS:
1298 case OP_NOTPOSQUERY:
1299 case OP_NOTPOSUPTO:
1300 case OP_NOTSTARI:
1301 case OP_NOTMINSTARI:
1302 case OP_NOTPLUSI:
1303 case OP_NOTMINPLUSI:
1304 case OP_NOTQUERYI:
1305 case OP_NOTMINQUERYI:
1306 case OP_NOTUPTOI:
1307 case OP_NOTMINUPTOI:
1308 case OP_NOTEXACTI:
1309 case OP_NOTPOSSTARI:
1310 case OP_NOTPOSPLUSI:
1311 case OP_NOTPOSQUERYI:
1312 case OP_NOTPOSUPTOI:
1313 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1314 break;
1315 }
1316 #else
1317 (void)(utf); /* Keep compiler happy by referencing function argument */
1318 #endif /* SUPPORT_WIDE_CHARS */
1319 }
1320 }
1321
1322 /* End of pcre2_auto_possess.c */
1323