• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2020 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43 
44 
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 
49 
50 #include "pcre2_internal.h"
51 
52 
53 /*************************************************
54 *        Tables for auto-possessification        *
55 *************************************************/
56 
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62 
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69 
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72 
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
92 };
93 
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99 
100   0   Always return FALSE (never auto-possessify)
101   1   Character groups are distinct (possessify if both are OP_PROP)
102   2   Check character categories in the same group (general or particular)
103   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104 
105   4   Check left general category vs right particular category
106   5   Check right general category vs left particular category
107 
108   6   Left alphanum vs right general category
109   7   Left space vs right general category
110   8   Left word vs right general category
111 
112   9   Right alphanum vs left general category
113  10   Right space vs left general category
114  11   Right word vs left general category
115 
116  12   Left alphanum vs right particular category
117  13   Left space vs right particular category
118  14   Left word vs right particular category
119 
120  15   Right alphanum vs left particular category
121  16   Right space vs left particular category
122  17   Right word vs left particular category
123 */
124 
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
127   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
128   { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
129   { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
130   { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
131   { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
132   { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
133   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
134   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
135   { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
136   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
137   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
138 };
139 
140 /* This table is used to check whether auto-possessification is possible
141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
142 specifies a general category and the other specifies a particular category. The
143 row is selected by the general category and the column by the particular
144 category. The value is 1 if the particular category is not part of the general
145 category. */
146 
147 static const uint8_t catposstab[7][30] = {
148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
149   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
150   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
151   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
152   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
153   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
155   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
156 };
157 
158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
159 a general or particular category. The properties in each row are those
160 that apply to the character set in question. Duplication means that a little
161 unnecessary work is done when checking, but this keeps things much simpler
162 because they can all use the same code. For more details see the comment where
163 this table is used.
164 
165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
166 "space", but from Perl 5.18 it's included, so both categories are treated the
167 same here. */
168 
169 static const uint8_t posspropstab[3][4] = {
170   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
171   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
172   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
173 };
174 #endif  /* SUPPORT_UNICODE */
175 
176 
177 
178 #ifdef SUPPORT_UNICODE
179 /*************************************************
180 *        Check a character and a property        *
181 *************************************************/
182 
183 /* This function is called by compare_opcodes() when a property item is
184 adjacent to a fixed character.
185 
186 Arguments:
187   c            the character
188   ptype        the property type
189   pdata        the data for the type
190   negated      TRUE if it's a negated property (\P or \p{^)
191 
192 Returns:       TRUE if auto-possessifying is OK
193 */
194 
195 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
197   BOOL negated)
198 {
199 const uint32_t *p;
200 const ucd_record *prop = GET_UCD(c);
201 
202 switch(ptype)
203   {
204   case PT_LAMP:
205   return (prop->chartype == ucp_Lu ||
206           prop->chartype == ucp_Ll ||
207           prop->chartype == ucp_Lt) == negated;
208 
209   case PT_GC:
210   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
211 
212   case PT_PC:
213   return (pdata == prop->chartype) == negated;
214 
215   case PT_SC:
216   return (pdata == prop->script) == negated;
217 
218   /* These are specials */
219 
220   case PT_ALNUM:
221   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
222           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
223 
224   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
225   means that Perl space and POSIX space are now identical. PCRE was changed
226   at release 8.34. */
227 
228   case PT_SPACE:    /* Perl space */
229   case PT_PXSPACE:  /* POSIX space */
230   switch(c)
231     {
232     HSPACE_CASES:
233     VSPACE_CASES:
234     return negated;
235 
236     default:
237     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
238     }
239   break;  /* Control never reaches here */
240 
241   case PT_WORD:
242   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
243           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
244           c == CHAR_UNDERSCORE) == negated;
245 
246   case PT_CLIST:
247   p = PRIV(ucd_caseless_sets) + prop->caseset;
248   for (;;)
249     {
250     if (c < *p) return !negated;
251     if (c == *p++) return negated;
252     }
253   break;  /* Control never reaches here */
254   }
255 
256 return FALSE;
257 }
258 #endif  /* SUPPORT_UNICODE */
259 
260 
261 
262 /*************************************************
263 *        Base opcode of repeated opcodes         *
264 *************************************************/
265 
266 /* Returns the base opcode for repeated single character type opcodes. If the
267 opcode is not a repeated character type, it returns with the original value.
268 
269 Arguments:  c opcode
270 Returns:    base opcode for the type
271 */
272 
273 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)274 get_repeat_base(PCRE2_UCHAR c)
275 {
276 return (c > OP_TYPEPOSUPTO)? c :
277        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
278        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
279        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
280        (c >= OP_STARI)?      OP_STARI :
281                              OP_STAR;
282 }
283 
284 
285 /*************************************************
286 *        Fill the character property list        *
287 *************************************************/
288 
289 /* Checks whether the code points to an opcode that can take part in auto-
290 possessification, and if so, fills a list with its properties.
291 
292 Arguments:
293   code        points to start of expression
294   utf         TRUE if in UTF mode
295   ucp         TRUE if in UCP mode
296   fcc         points to the case-flipping table
297   list        points to output list
298               list[0] will be filled with the opcode
299               list[1] will be non-zero if this opcode
300                 can match an empty character string
301               list[2..7] depends on the opcode
302 
303 Returns:      points to the start of the next opcode if *code is accepted
304               NULL if *code is not accepted
305 */
306 
307 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,BOOL ucp,const uint8_t * fcc,uint32_t * list)308 get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
309   uint32_t *list)
310 {
311 PCRE2_UCHAR c = *code;
312 PCRE2_UCHAR base;
313 PCRE2_SPTR end;
314 uint32_t chr;
315 
316 #ifdef SUPPORT_UNICODE
317 uint32_t *clist_dest;
318 const uint32_t *clist_src;
319 #else
320 (void)utf;    /* Suppress "unused parameter" compiler warnings */
321 (void)ucp;
322 #endif
323 
324 list[0] = c;
325 list[1] = FALSE;
326 code++;
327 
328 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
329   {
330   base = get_repeat_base(c);
331   c -= (base - OP_STAR);
332 
333   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
334     code += IMM2_SIZE;
335 
336   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
337              c != OP_POSPLUS);
338 
339   switch(base)
340     {
341     case OP_STAR:
342     list[0] = OP_CHAR;
343     break;
344 
345     case OP_STARI:
346     list[0] = OP_CHARI;
347     break;
348 
349     case OP_NOTSTAR:
350     list[0] = OP_NOT;
351     break;
352 
353     case OP_NOTSTARI:
354     list[0] = OP_NOTI;
355     break;
356 
357     case OP_TYPESTAR:
358     list[0] = *code;
359     code++;
360     break;
361     }
362   c = list[0];
363   }
364 
365 switch(c)
366   {
367   case OP_NOT_DIGIT:
368   case OP_DIGIT:
369   case OP_NOT_WHITESPACE:
370   case OP_WHITESPACE:
371   case OP_NOT_WORDCHAR:
372   case OP_WORDCHAR:
373   case OP_ANY:
374   case OP_ALLANY:
375   case OP_ANYNL:
376   case OP_NOT_HSPACE:
377   case OP_HSPACE:
378   case OP_NOT_VSPACE:
379   case OP_VSPACE:
380   case OP_EXTUNI:
381   case OP_EODN:
382   case OP_EOD:
383   case OP_DOLL:
384   case OP_DOLLM:
385   return code;
386 
387   case OP_CHAR:
388   case OP_NOT:
389   GETCHARINCTEST(chr, code);
390   list[2] = chr;
391   list[3] = NOTACHAR;
392   return code;
393 
394   case OP_CHARI:
395   case OP_NOTI:
396   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
397   GETCHARINCTEST(chr, code);
398   list[2] = chr;
399 
400 #ifdef SUPPORT_UNICODE
401   if (chr < 128 || (chr < 256 && !utf && !ucp))
402     list[3] = fcc[chr];
403   else
404     list[3] = UCD_OTHERCASE(chr);
405 #elif defined SUPPORT_WIDE_CHARS
406   list[3] = (chr < 256) ? fcc[chr] : chr;
407 #else
408   list[3] = fcc[chr];
409 #endif
410 
411   /* The othercase might be the same value. */
412 
413   if (chr == list[3])
414     list[3] = NOTACHAR;
415   else
416     list[4] = NOTACHAR;
417   return code;
418 
419 #ifdef SUPPORT_UNICODE
420   case OP_PROP:
421   case OP_NOTPROP:
422   if (code[0] != PT_CLIST)
423     {
424     list[2] = code[0];
425     list[3] = code[1];
426     return code + 2;
427     }
428 
429   /* Convert only if we have enough space. */
430 
431   clist_src = PRIV(ucd_caseless_sets) + code[1];
432   clist_dest = list + 2;
433   code += 2;
434 
435   do {
436      if (clist_dest >= list + 8)
437        {
438        /* Early return if there is not enough space. This should never
439        happen, since all clists are shorter than 5 character now. */
440        list[2] = code[0];
441        list[3] = code[1];
442        return code;
443        }
444      *clist_dest++ = *clist_src;
445      }
446   while(*clist_src++ != NOTACHAR);
447 
448   /* All characters are stored. The terminating NOTACHAR is copied from the
449   clist itself. */
450 
451   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
452   return code;
453 #endif
454 
455   case OP_NCLASS:
456   case OP_CLASS:
457 #ifdef SUPPORT_WIDE_CHARS
458   case OP_XCLASS:
459   if (c == OP_XCLASS)
460     end = code + GET(code, 0) - 1;
461   else
462 #endif
463     end = code + 32 / sizeof(PCRE2_UCHAR);
464 
465   switch(*end)
466     {
467     case OP_CRSTAR:
468     case OP_CRMINSTAR:
469     case OP_CRQUERY:
470     case OP_CRMINQUERY:
471     case OP_CRPOSSTAR:
472     case OP_CRPOSQUERY:
473     list[1] = TRUE;
474     end++;
475     break;
476 
477     case OP_CRPLUS:
478     case OP_CRMINPLUS:
479     case OP_CRPOSPLUS:
480     end++;
481     break;
482 
483     case OP_CRRANGE:
484     case OP_CRMINRANGE:
485     case OP_CRPOSRANGE:
486     list[1] = (GET2(end, 1) == 0);
487     end += 1 + 2 * IMM2_SIZE;
488     break;
489     }
490   list[2] = (uint32_t)(end - code);
491   return end;
492   }
493 return NULL;    /* Opcode not accepted */
494 }
495 
496 
497 
498 /*************************************************
499 *    Scan further character sets for match       *
500 *************************************************/
501 
502 /* Checks whether the base and the current opcode have a common character, in
503 which case the base cannot be possessified.
504 
505 Arguments:
506   code        points to the byte code
507   utf         TRUE in UTF mode
508   ucp         TRUE in UCP mode
509   cb          compile data block
510   base_list   the data list of the base opcode
511   base_end    the end of the base opcode
512   rec_limit   points to recursion depth counter
513 
514 Returns:      TRUE if the auto-possessification is possible
515 */
516 
517 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,BOOL ucp,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)518 compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
519   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
520 {
521 PCRE2_UCHAR c;
522 uint32_t list[8];
523 const uint32_t *chr_ptr;
524 const uint32_t *ochr_ptr;
525 const uint32_t *list_ptr;
526 PCRE2_SPTR next_code;
527 #ifdef SUPPORT_WIDE_CHARS
528 PCRE2_SPTR xclass_flags;
529 #endif
530 const uint8_t *class_bitset;
531 const uint8_t *set1, *set2, *set_end;
532 uint32_t chr;
533 BOOL accepted, invert_bits;
534 BOOL entered_a_group = FALSE;
535 
536 if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
537 
538 /* Note: the base_list[1] contains whether the current opcode has a greedy
539 (represented by a non-zero value) quantifier. This is a different from
540 other character type lists, which store here that the character iterator
541 matches to an empty string (also represented by a non-zero value). */
542 
543 for(;;)
544   {
545   /* All operations move the code pointer forward.
546   Therefore infinite recursions are not possible. */
547 
548   c = *code;
549 
550   /* Skip over callouts */
551 
552   if (c == OP_CALLOUT)
553     {
554     code += PRIV(OP_lengths)[c];
555     continue;
556     }
557 
558   if (c == OP_CALLOUT_STR)
559     {
560     code += GET(code, 1 + 2*LINK_SIZE);
561     continue;
562     }
563 
564   /* At the end of a branch, skip to the end of the group. */
565 
566   if (c == OP_ALT)
567     {
568     do code += GET(code, 1); while (*code == OP_ALT);
569     c = *code;
570     }
571 
572   /* Inspect the next opcode. */
573 
574   switch(c)
575     {
576     /* We can always possessify a greedy iterator at the end of the pattern,
577     which is reached after skipping over the final OP_KET. A non-greedy
578     iterator must never be possessified. */
579 
580     case OP_END:
581     return base_list[1] != 0;
582 
583     /* When an iterator is at the end of certain kinds of group we can inspect
584     what follows the group by skipping over the closing ket. Note that this
585     does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
586     iteration is variable (could be another iteration or could be the next
587     item). As these two opcodes are not listed in the next switch, they will
588     end up as the next code to inspect, and return FALSE by virtue of being
589     unsupported. */
590 
591     case OP_KET:
592     case OP_KETRPOS:
593     /* The non-greedy case cannot be converted to a possessive form. */
594 
595     if (base_list[1] == 0) return FALSE;
596 
597     /* If the bracket is capturing it might be referenced by an OP_RECURSE
598     so its last iterator can never be possessified if the pattern contains
599     recursions. (This could be improved by keeping a list of group numbers that
600     are called by recursion.) */
601 
602     switch(*(code - GET(code, 1)))
603       {
604       case OP_CBRA:
605       case OP_SCBRA:
606       case OP_CBRAPOS:
607       case OP_SCBRAPOS:
608       if (cb->had_recurse) return FALSE;
609       break;
610 
611       /* A script run might have to backtrack if the iterated item can match
612       characters from more than one script. So give up unless repeating an
613       explicit character. */
614 
615       case OP_SCRIPT_RUN:
616       if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
617         return FALSE;
618       break;
619 
620       /* Atomic sub-patterns and assertions can always auto-possessify their
621       last iterator. However, if the group was entered as a result of checking
622       a previous iterator, this is not possible. */
623 
624       case OP_ASSERT:
625       case OP_ASSERT_NOT:
626       case OP_ASSERTBACK:
627       case OP_ASSERTBACK_NOT:
628       case OP_ONCE:
629       return !entered_a_group;
630 
631       /* Non-atomic assertions - don't possessify last iterator. This needs
632       more thought. */
633 
634       case OP_ASSERT_NA:
635       case OP_ASSERTBACK_NA:
636       return FALSE;
637       }
638 
639     /* Skip over the bracket and inspect what comes next. */
640 
641     code += PRIV(OP_lengths)[c];
642     continue;
643 
644     /* Handle cases where the next item is a group. */
645 
646     case OP_ONCE:
647     case OP_BRA:
648     case OP_CBRA:
649     next_code = code + GET(code, 1);
650     code += PRIV(OP_lengths)[c];
651 
652     /* Check each branch. We have to recurse a level for all but the last
653     branch. */
654 
655     while (*next_code == OP_ALT)
656       {
657       if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
658         return FALSE;
659       code = next_code + 1 + LINK_SIZE;
660       next_code += GET(next_code, 1);
661       }
662 
663     entered_a_group = TRUE;
664     continue;
665 
666     case OP_BRAZERO:
667     case OP_BRAMINZERO:
668 
669     next_code = code + 1;
670     if (*next_code != OP_BRA && *next_code != OP_CBRA &&
671         *next_code != OP_ONCE) return FALSE;
672 
673     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
674 
675     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
676 
677     next_code += 1 + LINK_SIZE;
678     if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
679          rec_limit))
680       return FALSE;
681 
682     code += PRIV(OP_lengths)[c];
683     continue;
684 
685     /* The next opcode does not need special handling; fall through and use it
686     to see if the base can be possessified. */
687 
688     default:
689     break;
690     }
691 
692   /* We now have the next appropriate opcode to compare with the base. Check
693   for a supported opcode, and load its properties. */
694 
695   code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
696   if (code == NULL) return FALSE;    /* Unsupported */
697 
698   /* If either opcode is a small character list, set pointers for comparing
699   characters from that list with another list, or with a property. */
700 
701   if (base_list[0] == OP_CHAR)
702     {
703     chr_ptr = base_list + 2;
704     list_ptr = list;
705     }
706   else if (list[0] == OP_CHAR)
707     {
708     chr_ptr = list + 2;
709     list_ptr = base_list;
710     }
711 
712   /* Character bitsets can also be compared to certain opcodes. */
713 
714   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
715 #if PCRE2_CODE_UNIT_WIDTH == 8
716       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
717       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
718 #endif
719       )
720     {
721 #if PCRE2_CODE_UNIT_WIDTH == 8
722     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
723 #else
724     if (base_list[0] == OP_CLASS)
725 #endif
726       {
727       set1 = (uint8_t *)(base_end - base_list[2]);
728       list_ptr = list;
729       }
730     else
731       {
732       set1 = (uint8_t *)(code - list[2]);
733       list_ptr = base_list;
734       }
735 
736     invert_bits = FALSE;
737     switch(list_ptr[0])
738       {
739       case OP_CLASS:
740       case OP_NCLASS:
741       set2 = (uint8_t *)
742         ((list_ptr == list ? code : base_end) - list_ptr[2]);
743       break;
744 
745 #ifdef SUPPORT_WIDE_CHARS
746       case OP_XCLASS:
747       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
748       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
749       if ((*xclass_flags & XCL_MAP) == 0)
750         {
751         /* No bits are set for characters < 256. */
752         if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
753         /* Might be an empty repeat. */
754         continue;
755         }
756       set2 = (uint8_t *)(xclass_flags + 1);
757       break;
758 #endif
759 
760       case OP_NOT_DIGIT:
761       invert_bits = TRUE;
762       /* Fall through */
763       case OP_DIGIT:
764       set2 = (uint8_t *)(cb->cbits + cbit_digit);
765       break;
766 
767       case OP_NOT_WHITESPACE:
768       invert_bits = TRUE;
769       /* Fall through */
770       case OP_WHITESPACE:
771       set2 = (uint8_t *)(cb->cbits + cbit_space);
772       break;
773 
774       case OP_NOT_WORDCHAR:
775       invert_bits = TRUE;
776       /* Fall through */
777       case OP_WORDCHAR:
778       set2 = (uint8_t *)(cb->cbits + cbit_word);
779       break;
780 
781       default:
782       return FALSE;
783       }
784 
785     /* Because the bit sets are unaligned bytes, we need to perform byte
786     comparison here. */
787 
788     set_end = set1 + 32;
789     if (invert_bits)
790       {
791       do
792         {
793         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
794         }
795       while (set1 < set_end);
796       }
797     else
798       {
799       do
800         {
801         if ((*set1++ & *set2++) != 0) return FALSE;
802         }
803       while (set1 < set_end);
804       }
805 
806     if (list[1] == 0) return TRUE;
807     /* Might be an empty repeat. */
808     continue;
809     }
810 
811   /* Some property combinations also acceptable. Unicode property opcodes are
812   processed specially; the rest can be handled with a lookup table. */
813 
814   else
815     {
816     uint32_t leftop, rightop;
817 
818     leftop = base_list[0];
819     rightop = list[0];
820 
821 #ifdef SUPPORT_UNICODE
822     accepted = FALSE; /* Always set in non-unicode case. */
823     if (leftop == OP_PROP || leftop == OP_NOTPROP)
824       {
825       if (rightop == OP_EOD)
826         accepted = TRUE;
827       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
828         {
829         int n;
830         const uint8_t *p;
831         BOOL same = leftop == rightop;
832         BOOL lisprop = leftop == OP_PROP;
833         BOOL risprop = rightop == OP_PROP;
834         BOOL bothprop = lisprop && risprop;
835 
836         /* There's a table that specifies how each combination is to be
837         processed:
838           0   Always return FALSE (never auto-possessify)
839           1   Character groups are distinct (possessify if both are OP_PROP)
840           2   Check character categories in the same group (general or particular)
841           3   Return TRUE if the two opcodes are not the same
842           ... see comments below
843         */
844 
845         n = propposstab[base_list[2]][list[2]];
846         switch(n)
847           {
848           case 0: break;
849           case 1: accepted = bothprop; break;
850           case 2: accepted = (base_list[3] == list[3]) != same; break;
851           case 3: accepted = !same; break;
852 
853           case 4:  /* Left general category, right particular category */
854           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
855           break;
856 
857           case 5:  /* Right general category, left particular category */
858           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
859           break;
860 
861           /* This code is logically tricky. Think hard before fiddling with it.
862           The posspropstab table has four entries per row. Each row relates to
863           one of PCRE's special properties such as ALNUM or SPACE or WORD.
864           Only WORD actually needs all four entries, but using repeats for the
865           others means they can all use the same code below.
866 
867           The first two entries in each row are Unicode general categories, and
868           apply always, because all the characters they include are part of the
869           PCRE character set. The third and fourth entries are a general and a
870           particular category, respectively, that include one or more relevant
871           characters. One or the other is used, depending on whether the check
872           is for a general or a particular category. However, in both cases the
873           category contains more characters than the specials that are defined
874           for the property being tested against. Therefore, it cannot be used
875           in a NOTPROP case.
876 
877           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
878           Underscore is covered by ucp_P or ucp_Po. */
879 
880           case 6:  /* Left alphanum vs right general category */
881           case 7:  /* Left space vs right general category */
882           case 8:  /* Left word vs right general category */
883           p = posspropstab[n-6];
884           accepted = risprop && lisprop ==
885             (list[3] != p[0] &&
886              list[3] != p[1] &&
887             (list[3] != p[2] || !lisprop));
888           break;
889 
890           case 9:   /* Right alphanum vs left general category */
891           case 10:  /* Right space vs left general category */
892           case 11:  /* Right word vs left general category */
893           p = posspropstab[n-9];
894           accepted = lisprop && risprop ==
895             (base_list[3] != p[0] &&
896              base_list[3] != p[1] &&
897             (base_list[3] != p[2] || !risprop));
898           break;
899 
900           case 12:  /* Left alphanum vs right particular category */
901           case 13:  /* Left space vs right particular category */
902           case 14:  /* Left word vs right particular category */
903           p = posspropstab[n-12];
904           accepted = risprop && lisprop ==
905             (catposstab[p[0]][list[3]] &&
906              catposstab[p[1]][list[3]] &&
907             (list[3] != p[3] || !lisprop));
908           break;
909 
910           case 15:  /* Right alphanum vs left particular category */
911           case 16:  /* Right space vs left particular category */
912           case 17:  /* Right word vs left particular category */
913           p = posspropstab[n-15];
914           accepted = lisprop && risprop ==
915             (catposstab[p[0]][base_list[3]] &&
916              catposstab[p[1]][base_list[3]] &&
917             (base_list[3] != p[3] || !risprop));
918           break;
919           }
920         }
921       }
922 
923     else
924 #endif  /* SUPPORT_UNICODE */
925 
926     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
927            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
928            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
929 
930     if (!accepted) return FALSE;
931 
932     if (list[1] == 0) return TRUE;
933     /* Might be an empty repeat. */
934     continue;
935     }
936 
937   /* Control reaches here only if one of the items is a small character list.
938   All characters are checked against the other side. */
939 
940   do
941     {
942     chr = *chr_ptr;
943 
944     switch(list_ptr[0])
945       {
946       case OP_CHAR:
947       ochr_ptr = list_ptr + 2;
948       do
949         {
950         if (chr == *ochr_ptr) return FALSE;
951         ochr_ptr++;
952         }
953       while(*ochr_ptr != NOTACHAR);
954       break;
955 
956       case OP_NOT:
957       ochr_ptr = list_ptr + 2;
958       do
959         {
960         if (chr == *ochr_ptr)
961           break;
962         ochr_ptr++;
963         }
964       while(*ochr_ptr != NOTACHAR);
965       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
966       break;
967 
968       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
969       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
970 
971       case OP_DIGIT:
972       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
973       break;
974 
975       case OP_NOT_DIGIT:
976       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
977       break;
978 
979       case OP_WHITESPACE:
980       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
981       break;
982 
983       case OP_NOT_WHITESPACE:
984       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
985       break;
986 
987       case OP_WORDCHAR:
988       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
989       break;
990 
991       case OP_NOT_WORDCHAR:
992       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
993       break;
994 
995       case OP_HSPACE:
996       switch(chr)
997         {
998         HSPACE_CASES: return FALSE;
999         default: break;
1000         }
1001       break;
1002 
1003       case OP_NOT_HSPACE:
1004       switch(chr)
1005         {
1006         HSPACE_CASES: break;
1007         default: return FALSE;
1008         }
1009       break;
1010 
1011       case OP_ANYNL:
1012       case OP_VSPACE:
1013       switch(chr)
1014         {
1015         VSPACE_CASES: return FALSE;
1016         default: break;
1017         }
1018       break;
1019 
1020       case OP_NOT_VSPACE:
1021       switch(chr)
1022         {
1023         VSPACE_CASES: break;
1024         default: return FALSE;
1025         }
1026       break;
1027 
1028       case OP_DOLL:
1029       case OP_EODN:
1030       switch (chr)
1031         {
1032         case CHAR_CR:
1033         case CHAR_LF:
1034         case CHAR_VT:
1035         case CHAR_FF:
1036         case CHAR_NEL:
1037 #ifndef EBCDIC
1038         case 0x2028:
1039         case 0x2029:
1040 #endif  /* Not EBCDIC */
1041         return FALSE;
1042         }
1043       break;
1044 
1045       case OP_EOD:    /* Can always possessify before \z */
1046       break;
1047 
1048 #ifdef SUPPORT_UNICODE
1049       case OP_PROP:
1050       case OP_NOTPROP:
1051       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1052             list_ptr[0] == OP_NOTPROP))
1053         return FALSE;
1054       break;
1055 #endif
1056 
1057       case OP_NCLASS:
1058       if (chr > 255) return FALSE;
1059       /* Fall through */
1060 
1061       case OP_CLASS:
1062       if (chr > 255) break;
1063       class_bitset = (uint8_t *)
1064         ((list_ptr == list ? code : base_end) - list_ptr[2]);
1065       if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1066       break;
1067 
1068 #ifdef SUPPORT_WIDE_CHARS
1069       case OP_XCLASS:
1070       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1071           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1072       break;
1073 #endif
1074 
1075       default:
1076       return FALSE;
1077       }
1078 
1079     chr_ptr++;
1080     }
1081   while(*chr_ptr != NOTACHAR);
1082 
1083   /* At least one character must be matched from this opcode. */
1084 
1085   if (list[1] == 0) return TRUE;
1086   }
1087 
1088 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1089 but some compilers complain about an unreachable statement. */
1090 }
1091 
1092 
1093 
1094 /*************************************************
1095 *    Scan compiled regex for auto-possession     *
1096 *************************************************/
1097 
1098 /* Replaces single character iterations with their possessive alternatives
1099 if appropriate. This function modifies the compiled opcode! Hitting a
1100 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1101 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1102 overly complicated or large patterns. In these cases, the check just stops,
1103 leaving the remainder of the pattern unpossessified.
1104 
1105 Arguments:
1106   code        points to start of the byte code
1107   cb          compile data block
1108 
1109 Returns:      0 for success
1110               -1 if a non-existant opcode is encountered
1111 */
1112 
1113 int
PRIV(auto_possessify)1114 PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1115 {
1116 PCRE2_UCHAR c;
1117 PCRE2_SPTR end;
1118 PCRE2_UCHAR *repeat_opcode;
1119 uint32_t list[8];
1120 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1121 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1122 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1123 
1124 for (;;)
1125   {
1126   c = *code;
1127 
1128   if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
1129 
1130   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1131     {
1132     c -= get_repeat_base(c) - OP_STAR;
1133     end = (c <= OP_MINUPTO) ?
1134       get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1135     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1136 
1137     if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1138         &rec_limit))
1139       {
1140       switch(c)
1141         {
1142         case OP_STAR:
1143         *code += OP_POSSTAR - OP_STAR;
1144         break;
1145 
1146         case OP_MINSTAR:
1147         *code += OP_POSSTAR - OP_MINSTAR;
1148         break;
1149 
1150         case OP_PLUS:
1151         *code += OP_POSPLUS - OP_PLUS;
1152         break;
1153 
1154         case OP_MINPLUS:
1155         *code += OP_POSPLUS - OP_MINPLUS;
1156         break;
1157 
1158         case OP_QUERY:
1159         *code += OP_POSQUERY - OP_QUERY;
1160         break;
1161 
1162         case OP_MINQUERY:
1163         *code += OP_POSQUERY - OP_MINQUERY;
1164         break;
1165 
1166         case OP_UPTO:
1167         *code += OP_POSUPTO - OP_UPTO;
1168         break;
1169 
1170         case OP_MINUPTO:
1171         *code += OP_POSUPTO - OP_MINUPTO;
1172         break;
1173         }
1174       }
1175     c = *code;
1176     }
1177   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1178     {
1179 #ifdef SUPPORT_WIDE_CHARS
1180     if (c == OP_XCLASS)
1181       repeat_opcode = code + GET(code, 1);
1182     else
1183 #endif
1184       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1185 
1186     c = *repeat_opcode;
1187     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1188       {
1189       /* end must not be NULL. */
1190       end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1191 
1192       list[1] = (c & 1) == 0;
1193 
1194       if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1195         {
1196         switch (c)
1197           {
1198           case OP_CRSTAR:
1199           case OP_CRMINSTAR:
1200           *repeat_opcode = OP_CRPOSSTAR;
1201           break;
1202 
1203           case OP_CRPLUS:
1204           case OP_CRMINPLUS:
1205           *repeat_opcode = OP_CRPOSPLUS;
1206           break;
1207 
1208           case OP_CRQUERY:
1209           case OP_CRMINQUERY:
1210           *repeat_opcode = OP_CRPOSQUERY;
1211           break;
1212 
1213           case OP_CRRANGE:
1214           case OP_CRMINRANGE:
1215           *repeat_opcode = OP_CRPOSRANGE;
1216           break;
1217           }
1218         }
1219       }
1220     c = *code;
1221     }
1222 
1223   switch(c)
1224     {
1225     case OP_END:
1226     return 0;
1227 
1228     case OP_TYPESTAR:
1229     case OP_TYPEMINSTAR:
1230     case OP_TYPEPLUS:
1231     case OP_TYPEMINPLUS:
1232     case OP_TYPEQUERY:
1233     case OP_TYPEMINQUERY:
1234     case OP_TYPEPOSSTAR:
1235     case OP_TYPEPOSPLUS:
1236     case OP_TYPEPOSQUERY:
1237     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1238     break;
1239 
1240     case OP_TYPEUPTO:
1241     case OP_TYPEMINUPTO:
1242     case OP_TYPEEXACT:
1243     case OP_TYPEPOSUPTO:
1244     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1245       code += 2;
1246     break;
1247 
1248     case OP_CALLOUT_STR:
1249     code += GET(code, 1 + 2*LINK_SIZE);
1250     break;
1251 
1252 #ifdef SUPPORT_WIDE_CHARS
1253     case OP_XCLASS:
1254     code += GET(code, 1);
1255     break;
1256 #endif
1257 
1258     case OP_MARK:
1259     case OP_COMMIT_ARG:
1260     case OP_PRUNE_ARG:
1261     case OP_SKIP_ARG:
1262     case OP_THEN_ARG:
1263     code += code[1];
1264     break;
1265     }
1266 
1267   /* Add in the fixed length from the table */
1268 
1269   code += PRIV(OP_lengths)[c];
1270 
1271   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1272   followed by a multi-byte character. The length in the table is a minimum, so
1273   we have to arrange to skip the extra code units. */
1274 
1275 #ifdef MAYBE_UTF_MULTI
1276   if (utf) switch(c)
1277     {
1278     case OP_CHAR:
1279     case OP_CHARI:
1280     case OP_NOT:
1281     case OP_NOTI:
1282     case OP_STAR:
1283     case OP_MINSTAR:
1284     case OP_PLUS:
1285     case OP_MINPLUS:
1286     case OP_QUERY:
1287     case OP_MINQUERY:
1288     case OP_UPTO:
1289     case OP_MINUPTO:
1290     case OP_EXACT:
1291     case OP_POSSTAR:
1292     case OP_POSPLUS:
1293     case OP_POSQUERY:
1294     case OP_POSUPTO:
1295     case OP_STARI:
1296     case OP_MINSTARI:
1297     case OP_PLUSI:
1298     case OP_MINPLUSI:
1299     case OP_QUERYI:
1300     case OP_MINQUERYI:
1301     case OP_UPTOI:
1302     case OP_MINUPTOI:
1303     case OP_EXACTI:
1304     case OP_POSSTARI:
1305     case OP_POSPLUSI:
1306     case OP_POSQUERYI:
1307     case OP_POSUPTOI:
1308     case OP_NOTSTAR:
1309     case OP_NOTMINSTAR:
1310     case OP_NOTPLUS:
1311     case OP_NOTMINPLUS:
1312     case OP_NOTQUERY:
1313     case OP_NOTMINQUERY:
1314     case OP_NOTUPTO:
1315     case OP_NOTMINUPTO:
1316     case OP_NOTEXACT:
1317     case OP_NOTPOSSTAR:
1318     case OP_NOTPOSPLUS:
1319     case OP_NOTPOSQUERY:
1320     case OP_NOTPOSUPTO:
1321     case OP_NOTSTARI:
1322     case OP_NOTMINSTARI:
1323     case OP_NOTPLUSI:
1324     case OP_NOTMINPLUSI:
1325     case OP_NOTQUERYI:
1326     case OP_NOTMINQUERYI:
1327     case OP_NOTUPTOI:
1328     case OP_NOTMINUPTOI:
1329     case OP_NOTEXACTI:
1330     case OP_NOTPOSSTARI:
1331     case OP_NOTPOSPLUSI:
1332     case OP_NOTPOSQUERYI:
1333     case OP_NOTPOSUPTOI:
1334     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1335     break;
1336     }
1337 #else
1338   (void)(utf);  /* Keep compiler happy by referencing function argument */
1339 #endif  /* SUPPORT_WIDE_CHARS */
1340   }
1341 }
1342 
1343 /* End of pcre2_auto_possess.c */
1344