• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2019 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43 
44 
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 
49 
50 #include "pcre2_internal.h"
51 
52 
53 /*************************************************
54 *        Tables for auto-possessification        *
55 *************************************************/
56 
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62 
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69 
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72 
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
92 };
93 
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99 
100   0   Always return FALSE (never auto-possessify)
101   1   Character groups are distinct (possessify if both are OP_PROP)
102   2   Check character categories in the same group (general or particular)
103   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104 
105   4   Check left general category vs right particular category
106   5   Check right general category vs left particular category
107 
108   6   Left alphanum vs right general category
109   7   Left space vs right general category
110   8   Left word vs right general category
111 
112   9   Right alphanum vs left general category
113  10   Right space vs left general category
114  11   Right word vs left general category
115 
116  12   Left alphanum vs right particular category
117  13   Left space vs right particular category
118  14   Left word vs right particular category
119 
120  15   Right alphanum vs left particular category
121  16   Right space vs left particular category
122  17   Right word vs left particular category
123 */
124 
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
127   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
128   { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
129   { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
130   { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
131   { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
132   { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
133   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
134   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
135   { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
136   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
137   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
138 };
139 
140 /* This table is used to check whether auto-possessification is possible
141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
142 specifies a general category and the other specifies a particular category. The
143 row is selected by the general category and the column by the particular
144 category. The value is 1 if the particular category is not part of the general
145 category. */
146 
147 static const uint8_t catposstab[7][30] = {
148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
149   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
150   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
151   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
152   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
153   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
155   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
156 };
157 
158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
159 a general or particular category. The properties in each row are those
160 that apply to the character set in question. Duplication means that a little
161 unnecessary work is done when checking, but this keeps things much simpler
162 because they can all use the same code. For more details see the comment where
163 this table is used.
164 
165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
166 "space", but from Perl 5.18 it's included, so both categories are treated the
167 same here. */
168 
169 static const uint8_t posspropstab[3][4] = {
170   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
171   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
172   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
173 };
174 #endif  /* SUPPORT_UNICODE */
175 
176 
177 
178 #ifdef SUPPORT_UNICODE
179 /*************************************************
180 *        Check a character and a property        *
181 *************************************************/
182 
183 /* This function is called by compare_opcodes() when a property item is
184 adjacent to a fixed character.
185 
186 Arguments:
187   c            the character
188   ptype        the property type
189   pdata        the data for the type
190   negated      TRUE if it's a negated property (\P or \p{^)
191 
192 Returns:       TRUE if auto-possessifying is OK
193 */
194 
195 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
197   BOOL negated)
198 {
199 const uint32_t *p;
200 const ucd_record *prop = GET_UCD(c);
201 
202 switch(ptype)
203   {
204   case PT_LAMP:
205   return (prop->chartype == ucp_Lu ||
206           prop->chartype == ucp_Ll ||
207           prop->chartype == ucp_Lt) == negated;
208 
209   case PT_GC:
210   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
211 
212   case PT_PC:
213   return (pdata == prop->chartype) == negated;
214 
215   case PT_SC:
216   return (pdata == prop->script) == negated;
217 
218   /* These are specials */
219 
220   case PT_ALNUM:
221   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
222           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
223 
224   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
225   means that Perl space and POSIX space are now identical. PCRE was changed
226   at release 8.34. */
227 
228   case PT_SPACE:    /* Perl space */
229   case PT_PXSPACE:  /* POSIX space */
230   switch(c)
231     {
232     HSPACE_CASES:
233     VSPACE_CASES:
234     return negated;
235 
236     default:
237     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
238     }
239   break;  /* Control never reaches here */
240 
241   case PT_WORD:
242   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
243           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
244           c == CHAR_UNDERSCORE) == negated;
245 
246   case PT_CLIST:
247   p = PRIV(ucd_caseless_sets) + prop->caseset;
248   for (;;)
249     {
250     if (c < *p) return !negated;
251     if (c == *p++) return negated;
252     }
253   break;  /* Control never reaches here */
254   }
255 
256 return FALSE;
257 }
258 #endif  /* SUPPORT_UNICODE */
259 
260 
261 
262 /*************************************************
263 *        Base opcode of repeated opcodes         *
264 *************************************************/
265 
266 /* Returns the base opcode for repeated single character type opcodes. If the
267 opcode is not a repeated character type, it returns with the original value.
268 
269 Arguments:  c opcode
270 Returns:    base opcode for the type
271 */
272 
273 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)274 get_repeat_base(PCRE2_UCHAR c)
275 {
276 return (c > OP_TYPEPOSUPTO)? c :
277        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
278        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
279        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
280        (c >= OP_STARI)?      OP_STARI :
281                              OP_STAR;
282 }
283 
284 
285 /*************************************************
286 *        Fill the character property list        *
287 *************************************************/
288 
289 /* Checks whether the code points to an opcode that can take part in auto-
290 possessification, and if so, fills a list with its properties.
291 
292 Arguments:
293   code        points to start of expression
294   utf         TRUE if in UTF mode
295   fcc         points to the case-flipping table
296   list        points to output list
297               list[0] will be filled with the opcode
298               list[1] will be non-zero if this opcode
299                 can match an empty character string
300               list[2..7] depends on the opcode
301 
302 Returns:      points to the start of the next opcode if *code is accepted
303               NULL if *code is not accepted
304 */
305 
306 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,const uint8_t * fcc,uint32_t * list)307 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
308   uint32_t *list)
309 {
310 PCRE2_UCHAR c = *code;
311 PCRE2_UCHAR base;
312 PCRE2_SPTR end;
313 uint32_t chr;
314 
315 #ifdef SUPPORT_UNICODE
316 uint32_t *clist_dest;
317 const uint32_t *clist_src;
318 #else
319 (void)utf;    /* Suppress "unused parameter" compiler warning */
320 #endif
321 
322 list[0] = c;
323 list[1] = FALSE;
324 code++;
325 
326 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
327   {
328   base = get_repeat_base(c);
329   c -= (base - OP_STAR);
330 
331   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
332     code += IMM2_SIZE;
333 
334   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
335              c != OP_POSPLUS);
336 
337   switch(base)
338     {
339     case OP_STAR:
340     list[0] = OP_CHAR;
341     break;
342 
343     case OP_STARI:
344     list[0] = OP_CHARI;
345     break;
346 
347     case OP_NOTSTAR:
348     list[0] = OP_NOT;
349     break;
350 
351     case OP_NOTSTARI:
352     list[0] = OP_NOTI;
353     break;
354 
355     case OP_TYPESTAR:
356     list[0] = *code;
357     code++;
358     break;
359     }
360   c = list[0];
361   }
362 
363 switch(c)
364   {
365   case OP_NOT_DIGIT:
366   case OP_DIGIT:
367   case OP_NOT_WHITESPACE:
368   case OP_WHITESPACE:
369   case OP_NOT_WORDCHAR:
370   case OP_WORDCHAR:
371   case OP_ANY:
372   case OP_ALLANY:
373   case OP_ANYNL:
374   case OP_NOT_HSPACE:
375   case OP_HSPACE:
376   case OP_NOT_VSPACE:
377   case OP_VSPACE:
378   case OP_EXTUNI:
379   case OP_EODN:
380   case OP_EOD:
381   case OP_DOLL:
382   case OP_DOLLM:
383   return code;
384 
385   case OP_CHAR:
386   case OP_NOT:
387   GETCHARINCTEST(chr, code);
388   list[2] = chr;
389   list[3] = NOTACHAR;
390   return code;
391 
392   case OP_CHARI:
393   case OP_NOTI:
394   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
395   GETCHARINCTEST(chr, code);
396   list[2] = chr;
397 
398 #ifdef SUPPORT_UNICODE
399   if (chr < 128 || (chr < 256 && !utf))
400     list[3] = fcc[chr];
401   else
402     list[3] = UCD_OTHERCASE(chr);
403 #elif defined SUPPORT_WIDE_CHARS
404   list[3] = (chr < 256) ? fcc[chr] : chr;
405 #else
406   list[3] = fcc[chr];
407 #endif
408 
409   /* The othercase might be the same value. */
410 
411   if (chr == list[3])
412     list[3] = NOTACHAR;
413   else
414     list[4] = NOTACHAR;
415   return code;
416 
417 #ifdef SUPPORT_UNICODE
418   case OP_PROP:
419   case OP_NOTPROP:
420   if (code[0] != PT_CLIST)
421     {
422     list[2] = code[0];
423     list[3] = code[1];
424     return code + 2;
425     }
426 
427   /* Convert only if we have enough space. */
428 
429   clist_src = PRIV(ucd_caseless_sets) + code[1];
430   clist_dest = list + 2;
431   code += 2;
432 
433   do {
434      if (clist_dest >= list + 8)
435        {
436        /* Early return if there is not enough space. This should never
437        happen, since all clists are shorter than 5 character now. */
438        list[2] = code[0];
439        list[3] = code[1];
440        return code;
441        }
442      *clist_dest++ = *clist_src;
443      }
444   while(*clist_src++ != NOTACHAR);
445 
446   /* All characters are stored. The terminating NOTACHAR is copied from the
447   clist itself. */
448 
449   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
450   return code;
451 #endif
452 
453   case OP_NCLASS:
454   case OP_CLASS:
455 #ifdef SUPPORT_WIDE_CHARS
456   case OP_XCLASS:
457   if (c == OP_XCLASS)
458     end = code + GET(code, 0) - 1;
459   else
460 #endif
461     end = code + 32 / sizeof(PCRE2_UCHAR);
462 
463   switch(*end)
464     {
465     case OP_CRSTAR:
466     case OP_CRMINSTAR:
467     case OP_CRQUERY:
468     case OP_CRMINQUERY:
469     case OP_CRPOSSTAR:
470     case OP_CRPOSQUERY:
471     list[1] = TRUE;
472     end++;
473     break;
474 
475     case OP_CRPLUS:
476     case OP_CRMINPLUS:
477     case OP_CRPOSPLUS:
478     end++;
479     break;
480 
481     case OP_CRRANGE:
482     case OP_CRMINRANGE:
483     case OP_CRPOSRANGE:
484     list[1] = (GET2(end, 1) == 0);
485     end += 1 + 2 * IMM2_SIZE;
486     break;
487     }
488   list[2] = (uint32_t)(end - code);
489   return end;
490   }
491 return NULL;    /* Opcode not accepted */
492 }
493 
494 
495 
496 /*************************************************
497 *    Scan further character sets for match       *
498 *************************************************/
499 
500 /* Checks whether the base and the current opcode have a common character, in
501 which case the base cannot be possessified.
502 
503 Arguments:
504   code        points to the byte code
505   utf         TRUE in UTF mode
506   cb          compile data block
507   base_list   the data list of the base opcode
508   base_end    the end of the base opcode
509   rec_limit   points to recursion depth counter
510 
511 Returns:      TRUE if the auto-possessification is possible
512 */
513 
514 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)515 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
516   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
517 {
518 PCRE2_UCHAR c;
519 uint32_t list[8];
520 const uint32_t *chr_ptr;
521 const uint32_t *ochr_ptr;
522 const uint32_t *list_ptr;
523 PCRE2_SPTR next_code;
524 #ifdef SUPPORT_WIDE_CHARS
525 PCRE2_SPTR xclass_flags;
526 #endif
527 const uint8_t *class_bitset;
528 const uint8_t *set1, *set2, *set_end;
529 uint32_t chr;
530 BOOL accepted, invert_bits;
531 BOOL entered_a_group = FALSE;
532 
533 if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
534 
535 /* Note: the base_list[1] contains whether the current opcode has a greedy
536 (represented by a non-zero value) quantifier. This is a different from
537 other character type lists, which store here that the character iterator
538 matches to an empty string (also represented by a non-zero value). */
539 
540 for(;;)
541   {
542   /* All operations move the code pointer forward.
543   Therefore infinite recursions are not possible. */
544 
545   c = *code;
546 
547   /* Skip over callouts */
548 
549   if (c == OP_CALLOUT)
550     {
551     code += PRIV(OP_lengths)[c];
552     continue;
553     }
554 
555   if (c == OP_CALLOUT_STR)
556     {
557     code += GET(code, 1 + 2*LINK_SIZE);
558     continue;
559     }
560 
561   /* At the end of a branch, skip to the end of the group. */
562 
563   if (c == OP_ALT)
564     {
565     do code += GET(code, 1); while (*code == OP_ALT);
566     c = *code;
567     }
568 
569   /* Inspect the next opcode. */
570 
571   switch(c)
572     {
573     /* We can always possessify a greedy iterator at the end of the pattern,
574     which is reached after skipping over the final OP_KET. A non-greedy
575     iterator must never be possessified. */
576 
577     case OP_END:
578     return base_list[1] != 0;
579 
580     /* When an iterator is at the end of certain kinds of group we can inspect
581     what follows the group by skipping over the closing ket. Note that this
582     does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
583     iteration is variable (could be another iteration or could be the next
584     item). As these two opcodes are not listed in the next switch, they will
585     end up as the next code to inspect, and return FALSE by virtue of being
586     unsupported. */
587 
588     case OP_KET:
589     case OP_KETRPOS:
590     /* The non-greedy case cannot be converted to a possessive form. */
591 
592     if (base_list[1] == 0) return FALSE;
593 
594     /* If the bracket is capturing it might be referenced by an OP_RECURSE
595     so its last iterator can never be possessified if the pattern contains
596     recursions. (This could be improved by keeping a list of group numbers that
597     are called by recursion.) */
598 
599     switch(*(code - GET(code, 1)))
600       {
601       case OP_CBRA:
602       case OP_SCBRA:
603       case OP_CBRAPOS:
604       case OP_SCBRAPOS:
605       if (cb->had_recurse) return FALSE;
606       break;
607 
608       /* A script run might have to backtrack if the iterated item can match
609       characters from more than one script. So give up unless repeating an
610       explicit character. */
611 
612       case OP_SCRIPT_RUN:
613       if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
614         return FALSE;
615       break;
616 
617       /* Atomic sub-patterns and assertions can always auto-possessify their
618       last iterator. However, if the group was entered as a result of checking
619       a previous iterator, this is not possible. */
620 
621       case OP_ASSERT:
622       case OP_ASSERT_NOT:
623       case OP_ASSERTBACK:
624       case OP_ASSERTBACK_NOT:
625       case OP_ONCE:
626       return !entered_a_group;
627       }
628 
629     /* Skip over the bracket and inspect what comes next. */
630 
631     code += PRIV(OP_lengths)[c];
632     continue;
633 
634     /* Handle cases where the next item is a group. */
635 
636     case OP_ONCE:
637     case OP_BRA:
638     case OP_CBRA:
639     next_code = code + GET(code, 1);
640     code += PRIV(OP_lengths)[c];
641 
642     /* Check each branch. We have to recurse a level for all but the last
643     branch. */
644 
645     while (*next_code == OP_ALT)
646       {
647       if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
648         return FALSE;
649       code = next_code + 1 + LINK_SIZE;
650       next_code += GET(next_code, 1);
651       }
652 
653     entered_a_group = TRUE;
654     continue;
655 
656     case OP_BRAZERO:
657     case OP_BRAMINZERO:
658 
659     next_code = code + 1;
660     if (*next_code != OP_BRA && *next_code != OP_CBRA &&
661         *next_code != OP_ONCE) return FALSE;
662 
663     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
664 
665     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
666 
667     next_code += 1 + LINK_SIZE;
668     if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
669       return FALSE;
670 
671     code += PRIV(OP_lengths)[c];
672     continue;
673 
674     /* The next opcode does not need special handling; fall through and use it
675     to see if the base can be possessified. */
676 
677     default:
678     break;
679     }
680 
681   /* We now have the next appropriate opcode to compare with the base. Check
682   for a supported opcode, and load its properties. */
683 
684   code = get_chr_property_list(code, utf, cb->fcc, list);
685   if (code == NULL) return FALSE;    /* Unsupported */
686 
687   /* If either opcode is a small character list, set pointers for comparing
688   characters from that list with another list, or with a property. */
689 
690   if (base_list[0] == OP_CHAR)
691     {
692     chr_ptr = base_list + 2;
693     list_ptr = list;
694     }
695   else if (list[0] == OP_CHAR)
696     {
697     chr_ptr = list + 2;
698     list_ptr = base_list;
699     }
700 
701   /* Character bitsets can also be compared to certain opcodes. */
702 
703   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
704 #if PCRE2_CODE_UNIT_WIDTH == 8
705       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
706       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
707 #endif
708       )
709     {
710 #if PCRE2_CODE_UNIT_WIDTH == 8
711     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
712 #else
713     if (base_list[0] == OP_CLASS)
714 #endif
715       {
716       set1 = (uint8_t *)(base_end - base_list[2]);
717       list_ptr = list;
718       }
719     else
720       {
721       set1 = (uint8_t *)(code - list[2]);
722       list_ptr = base_list;
723       }
724 
725     invert_bits = FALSE;
726     switch(list_ptr[0])
727       {
728       case OP_CLASS:
729       case OP_NCLASS:
730       set2 = (uint8_t *)
731         ((list_ptr == list ? code : base_end) - list_ptr[2]);
732       break;
733 
734 #ifdef SUPPORT_WIDE_CHARS
735       case OP_XCLASS:
736       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
737       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
738       if ((*xclass_flags & XCL_MAP) == 0)
739         {
740         /* No bits are set for characters < 256. */
741         if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
742         /* Might be an empty repeat. */
743         continue;
744         }
745       set2 = (uint8_t *)(xclass_flags + 1);
746       break;
747 #endif
748 
749       case OP_NOT_DIGIT:
750       invert_bits = TRUE;
751       /* Fall through */
752       case OP_DIGIT:
753       set2 = (uint8_t *)(cb->cbits + cbit_digit);
754       break;
755 
756       case OP_NOT_WHITESPACE:
757       invert_bits = TRUE;
758       /* Fall through */
759       case OP_WHITESPACE:
760       set2 = (uint8_t *)(cb->cbits + cbit_space);
761       break;
762 
763       case OP_NOT_WORDCHAR:
764       invert_bits = TRUE;
765       /* Fall through */
766       case OP_WORDCHAR:
767       set2 = (uint8_t *)(cb->cbits + cbit_word);
768       break;
769 
770       default:
771       return FALSE;
772       }
773 
774     /* Because the bit sets are unaligned bytes, we need to perform byte
775     comparison here. */
776 
777     set_end = set1 + 32;
778     if (invert_bits)
779       {
780       do
781         {
782         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
783         }
784       while (set1 < set_end);
785       }
786     else
787       {
788       do
789         {
790         if ((*set1++ & *set2++) != 0) return FALSE;
791         }
792       while (set1 < set_end);
793       }
794 
795     if (list[1] == 0) return TRUE;
796     /* Might be an empty repeat. */
797     continue;
798     }
799 
800   /* Some property combinations also acceptable. Unicode property opcodes are
801   processed specially; the rest can be handled with a lookup table. */
802 
803   else
804     {
805     uint32_t leftop, rightop;
806 
807     leftop = base_list[0];
808     rightop = list[0];
809 
810 #ifdef SUPPORT_UNICODE
811     accepted = FALSE; /* Always set in non-unicode case. */
812     if (leftop == OP_PROP || leftop == OP_NOTPROP)
813       {
814       if (rightop == OP_EOD)
815         accepted = TRUE;
816       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
817         {
818         int n;
819         const uint8_t *p;
820         BOOL same = leftop == rightop;
821         BOOL lisprop = leftop == OP_PROP;
822         BOOL risprop = rightop == OP_PROP;
823         BOOL bothprop = lisprop && risprop;
824 
825         /* There's a table that specifies how each combination is to be
826         processed:
827           0   Always return FALSE (never auto-possessify)
828           1   Character groups are distinct (possessify if both are OP_PROP)
829           2   Check character categories in the same group (general or particular)
830           3   Return TRUE if the two opcodes are not the same
831           ... see comments below
832         */
833 
834         n = propposstab[base_list[2]][list[2]];
835         switch(n)
836           {
837           case 0: break;
838           case 1: accepted = bothprop; break;
839           case 2: accepted = (base_list[3] == list[3]) != same; break;
840           case 3: accepted = !same; break;
841 
842           case 4:  /* Left general category, right particular category */
843           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
844           break;
845 
846           case 5:  /* Right general category, left particular category */
847           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
848           break;
849 
850           /* This code is logically tricky. Think hard before fiddling with it.
851           The posspropstab table has four entries per row. Each row relates to
852           one of PCRE's special properties such as ALNUM or SPACE or WORD.
853           Only WORD actually needs all four entries, but using repeats for the
854           others means they can all use the same code below.
855 
856           The first two entries in each row are Unicode general categories, and
857           apply always, because all the characters they include are part of the
858           PCRE character set. The third and fourth entries are a general and a
859           particular category, respectively, that include one or more relevant
860           characters. One or the other is used, depending on whether the check
861           is for a general or a particular category. However, in both cases the
862           category contains more characters than the specials that are defined
863           for the property being tested against. Therefore, it cannot be used
864           in a NOTPROP case.
865 
866           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
867           Underscore is covered by ucp_P or ucp_Po. */
868 
869           case 6:  /* Left alphanum vs right general category */
870           case 7:  /* Left space vs right general category */
871           case 8:  /* Left word vs right general category */
872           p = posspropstab[n-6];
873           accepted = risprop && lisprop ==
874             (list[3] != p[0] &&
875              list[3] != p[1] &&
876             (list[3] != p[2] || !lisprop));
877           break;
878 
879           case 9:   /* Right alphanum vs left general category */
880           case 10:  /* Right space vs left general category */
881           case 11:  /* Right word vs left general category */
882           p = posspropstab[n-9];
883           accepted = lisprop && risprop ==
884             (base_list[3] != p[0] &&
885              base_list[3] != p[1] &&
886             (base_list[3] != p[2] || !risprop));
887           break;
888 
889           case 12:  /* Left alphanum vs right particular category */
890           case 13:  /* Left space vs right particular category */
891           case 14:  /* Left word vs right particular category */
892           p = posspropstab[n-12];
893           accepted = risprop && lisprop ==
894             (catposstab[p[0]][list[3]] &&
895              catposstab[p[1]][list[3]] &&
896             (list[3] != p[3] || !lisprop));
897           break;
898 
899           case 15:  /* Right alphanum vs left particular category */
900           case 16:  /* Right space vs left particular category */
901           case 17:  /* Right word vs left particular category */
902           p = posspropstab[n-15];
903           accepted = lisprop && risprop ==
904             (catposstab[p[0]][base_list[3]] &&
905              catposstab[p[1]][base_list[3]] &&
906             (base_list[3] != p[3] || !risprop));
907           break;
908           }
909         }
910       }
911 
912     else
913 #endif  /* SUPPORT_UNICODE */
914 
915     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
916            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
917            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
918 
919     if (!accepted) return FALSE;
920 
921     if (list[1] == 0) return TRUE;
922     /* Might be an empty repeat. */
923     continue;
924     }
925 
926   /* Control reaches here only if one of the items is a small character list.
927   All characters are checked against the other side. */
928 
929   do
930     {
931     chr = *chr_ptr;
932 
933     switch(list_ptr[0])
934       {
935       case OP_CHAR:
936       ochr_ptr = list_ptr + 2;
937       do
938         {
939         if (chr == *ochr_ptr) return FALSE;
940         ochr_ptr++;
941         }
942       while(*ochr_ptr != NOTACHAR);
943       break;
944 
945       case OP_NOT:
946       ochr_ptr = list_ptr + 2;
947       do
948         {
949         if (chr == *ochr_ptr)
950           break;
951         ochr_ptr++;
952         }
953       while(*ochr_ptr != NOTACHAR);
954       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
955       break;
956 
957       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
958       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
959 
960       case OP_DIGIT:
961       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
962       break;
963 
964       case OP_NOT_DIGIT:
965       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
966       break;
967 
968       case OP_WHITESPACE:
969       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
970       break;
971 
972       case OP_NOT_WHITESPACE:
973       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
974       break;
975 
976       case OP_WORDCHAR:
977       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
978       break;
979 
980       case OP_NOT_WORDCHAR:
981       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
982       break;
983 
984       case OP_HSPACE:
985       switch(chr)
986         {
987         HSPACE_CASES: return FALSE;
988         default: break;
989         }
990       break;
991 
992       case OP_NOT_HSPACE:
993       switch(chr)
994         {
995         HSPACE_CASES: break;
996         default: return FALSE;
997         }
998       break;
999 
1000       case OP_ANYNL:
1001       case OP_VSPACE:
1002       switch(chr)
1003         {
1004         VSPACE_CASES: return FALSE;
1005         default: break;
1006         }
1007       break;
1008 
1009       case OP_NOT_VSPACE:
1010       switch(chr)
1011         {
1012         VSPACE_CASES: break;
1013         default: return FALSE;
1014         }
1015       break;
1016 
1017       case OP_DOLL:
1018       case OP_EODN:
1019       switch (chr)
1020         {
1021         case CHAR_CR:
1022         case CHAR_LF:
1023         case CHAR_VT:
1024         case CHAR_FF:
1025         case CHAR_NEL:
1026 #ifndef EBCDIC
1027         case 0x2028:
1028         case 0x2029:
1029 #endif  /* Not EBCDIC */
1030         return FALSE;
1031         }
1032       break;
1033 
1034       case OP_EOD:    /* Can always possessify before \z */
1035       break;
1036 
1037 #ifdef SUPPORT_UNICODE
1038       case OP_PROP:
1039       case OP_NOTPROP:
1040       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1041             list_ptr[0] == OP_NOTPROP))
1042         return FALSE;
1043       break;
1044 #endif
1045 
1046       case OP_NCLASS:
1047       if (chr > 255) return FALSE;
1048       /* Fall through */
1049 
1050       case OP_CLASS:
1051       if (chr > 255) break;
1052       class_bitset = (uint8_t *)
1053         ((list_ptr == list ? code : base_end) - list_ptr[2]);
1054       if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1055       break;
1056 
1057 #ifdef SUPPORT_WIDE_CHARS
1058       case OP_XCLASS:
1059       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1060           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1061       break;
1062 #endif
1063 
1064       default:
1065       return FALSE;
1066       }
1067 
1068     chr_ptr++;
1069     }
1070   while(*chr_ptr != NOTACHAR);
1071 
1072   /* At least one character must be matched from this opcode. */
1073 
1074   if (list[1] == 0) return TRUE;
1075   }
1076 
1077 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1078 but some compilers complain about an unreachable statement. */
1079 }
1080 
1081 
1082 
1083 /*************************************************
1084 *    Scan compiled regex for auto-possession     *
1085 *************************************************/
1086 
1087 /* Replaces single character iterations with their possessive alternatives
1088 if appropriate. This function modifies the compiled opcode! Hitting a
1089 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1090 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1091 overly complicated or large patterns. In these cases, the check just stops,
1092 leaving the remainder of the pattern unpossessified.
1093 
1094 Arguments:
1095   code        points to start of the byte code
1096   utf         TRUE in UTF mode
1097   cb          compile data block
1098 
1099 Returns:      0 for success
1100               -1 if a non-existant opcode is encountered
1101 */
1102 
1103 int
PRIV(auto_possessify)1104 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
1105 {
1106 PCRE2_UCHAR c;
1107 PCRE2_SPTR end;
1108 PCRE2_UCHAR *repeat_opcode;
1109 uint32_t list[8];
1110 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1111 
1112 for (;;)
1113   {
1114   c = *code;
1115 
1116   if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
1117 
1118   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1119     {
1120     c -= get_repeat_base(c) - OP_STAR;
1121     end = (c <= OP_MINUPTO) ?
1122       get_chr_property_list(code, utf, cb->fcc, list) : NULL;
1123     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1124 
1125     if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
1126       {
1127       switch(c)
1128         {
1129         case OP_STAR:
1130         *code += OP_POSSTAR - OP_STAR;
1131         break;
1132 
1133         case OP_MINSTAR:
1134         *code += OP_POSSTAR - OP_MINSTAR;
1135         break;
1136 
1137         case OP_PLUS:
1138         *code += OP_POSPLUS - OP_PLUS;
1139         break;
1140 
1141         case OP_MINPLUS:
1142         *code += OP_POSPLUS - OP_MINPLUS;
1143         break;
1144 
1145         case OP_QUERY:
1146         *code += OP_POSQUERY - OP_QUERY;
1147         break;
1148 
1149         case OP_MINQUERY:
1150         *code += OP_POSQUERY - OP_MINQUERY;
1151         break;
1152 
1153         case OP_UPTO:
1154         *code += OP_POSUPTO - OP_UPTO;
1155         break;
1156 
1157         case OP_MINUPTO:
1158         *code += OP_POSUPTO - OP_MINUPTO;
1159         break;
1160         }
1161       }
1162     c = *code;
1163     }
1164   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1165     {
1166 #ifdef SUPPORT_WIDE_CHARS
1167     if (c == OP_XCLASS)
1168       repeat_opcode = code + GET(code, 1);
1169     else
1170 #endif
1171       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1172 
1173     c = *repeat_opcode;
1174     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1175       {
1176       /* end must not be NULL. */
1177       end = get_chr_property_list(code, utf, cb->fcc, list);
1178 
1179       list[1] = (c & 1) == 0;
1180 
1181       if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
1182         {
1183         switch (c)
1184           {
1185           case OP_CRSTAR:
1186           case OP_CRMINSTAR:
1187           *repeat_opcode = OP_CRPOSSTAR;
1188           break;
1189 
1190           case OP_CRPLUS:
1191           case OP_CRMINPLUS:
1192           *repeat_opcode = OP_CRPOSPLUS;
1193           break;
1194 
1195           case OP_CRQUERY:
1196           case OP_CRMINQUERY:
1197           *repeat_opcode = OP_CRPOSQUERY;
1198           break;
1199 
1200           case OP_CRRANGE:
1201           case OP_CRMINRANGE:
1202           *repeat_opcode = OP_CRPOSRANGE;
1203           break;
1204           }
1205         }
1206       }
1207     c = *code;
1208     }
1209 
1210   switch(c)
1211     {
1212     case OP_END:
1213     return 0;
1214 
1215     case OP_TYPESTAR:
1216     case OP_TYPEMINSTAR:
1217     case OP_TYPEPLUS:
1218     case OP_TYPEMINPLUS:
1219     case OP_TYPEQUERY:
1220     case OP_TYPEMINQUERY:
1221     case OP_TYPEPOSSTAR:
1222     case OP_TYPEPOSPLUS:
1223     case OP_TYPEPOSQUERY:
1224     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1225     break;
1226 
1227     case OP_TYPEUPTO:
1228     case OP_TYPEMINUPTO:
1229     case OP_TYPEEXACT:
1230     case OP_TYPEPOSUPTO:
1231     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1232       code += 2;
1233     break;
1234 
1235     case OP_CALLOUT_STR:
1236     code += GET(code, 1 + 2*LINK_SIZE);
1237     break;
1238 
1239 #ifdef SUPPORT_WIDE_CHARS
1240     case OP_XCLASS:
1241     code += GET(code, 1);
1242     break;
1243 #endif
1244 
1245     case OP_MARK:
1246     case OP_COMMIT_ARG:
1247     case OP_PRUNE_ARG:
1248     case OP_SKIP_ARG:
1249     case OP_THEN_ARG:
1250     code += code[1];
1251     break;
1252     }
1253 
1254   /* Add in the fixed length from the table */
1255 
1256   code += PRIV(OP_lengths)[c];
1257 
1258   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1259   followed by a multi-byte character. The length in the table is a minimum, so
1260   we have to arrange to skip the extra code units. */
1261 
1262 #ifdef MAYBE_UTF_MULTI
1263   if (utf) switch(c)
1264     {
1265     case OP_CHAR:
1266     case OP_CHARI:
1267     case OP_NOT:
1268     case OP_NOTI:
1269     case OP_STAR:
1270     case OP_MINSTAR:
1271     case OP_PLUS:
1272     case OP_MINPLUS:
1273     case OP_QUERY:
1274     case OP_MINQUERY:
1275     case OP_UPTO:
1276     case OP_MINUPTO:
1277     case OP_EXACT:
1278     case OP_POSSTAR:
1279     case OP_POSPLUS:
1280     case OP_POSQUERY:
1281     case OP_POSUPTO:
1282     case OP_STARI:
1283     case OP_MINSTARI:
1284     case OP_PLUSI:
1285     case OP_MINPLUSI:
1286     case OP_QUERYI:
1287     case OP_MINQUERYI:
1288     case OP_UPTOI:
1289     case OP_MINUPTOI:
1290     case OP_EXACTI:
1291     case OP_POSSTARI:
1292     case OP_POSPLUSI:
1293     case OP_POSQUERYI:
1294     case OP_POSUPTOI:
1295     case OP_NOTSTAR:
1296     case OP_NOTMINSTAR:
1297     case OP_NOTPLUS:
1298     case OP_NOTMINPLUS:
1299     case OP_NOTQUERY:
1300     case OP_NOTMINQUERY:
1301     case OP_NOTUPTO:
1302     case OP_NOTMINUPTO:
1303     case OP_NOTEXACT:
1304     case OP_NOTPOSSTAR:
1305     case OP_NOTPOSPLUS:
1306     case OP_NOTPOSQUERY:
1307     case OP_NOTPOSUPTO:
1308     case OP_NOTSTARI:
1309     case OP_NOTMINSTARI:
1310     case OP_NOTPLUSI:
1311     case OP_NOTMINPLUSI:
1312     case OP_NOTQUERYI:
1313     case OP_NOTMINQUERYI:
1314     case OP_NOTUPTOI:
1315     case OP_NOTMINUPTOI:
1316     case OP_NOTEXACTI:
1317     case OP_NOTPOSSTARI:
1318     case OP_NOTPOSPLUSI:
1319     case OP_NOTPOSQUERYI:
1320     case OP_NOTPOSUPTOI:
1321     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1322     break;
1323     }
1324 #else
1325   (void)(utf);  /* Keep compiler happy by referencing function argument */
1326 #endif  /* SUPPORT_WIDE_CHARS */
1327   }
1328 }
1329 
1330 /* End of pcre2_auto_possess.c */
1331