• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2019 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* These defines enable debugging code */
47 
48 /* #define DEBUG_FRAMES_DISPLAY */
49 /* #define DEBUG_SHOW_OPS */
50 /* #define DEBUG_SHOW_RMATCH */
51 
52 #ifdef DEBUG_FRAME_DISPLAY
53 #include <stdarg.h>
54 #endif
55 
56 /* These defines identify the name of the block containing "static"
57 information, and fields within it. */
58 
59 #define NLBLOCK mb              /* Block containing newline information */
60 #define PSSTART start_subject   /* Field containing processed string start */
61 #define PSEND   end_subject     /* Field containing processed string end */
62 
63 #include "pcre2_internal.h"
64 
65 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66 
67 /* Masks for identifying the public options that are permitted at match time. */
68 
69 #define PUBLIC_MATCH_OPTIONS \
70   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73 
74 #define PUBLIC_JIT_MATCH_OPTIONS \
75    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77     PCRE2_COPY_MATCHED_SUBJECT)
78 
79 /* Non-error returns from and within the match() function. Error returns are
80 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81 
82 #define MATCH_MATCH        1
83 #define MATCH_NOMATCH      0
84 
85 /* Special internal returns used in the match() function. Make them
86 sufficiently negative to avoid the external error codes. */
87 
88 #define MATCH_ACCEPT       (-999)
89 #define MATCH_KETRPOS      (-998)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-997)
93 #define MATCH_PRUNE        (-996)
94 #define MATCH_SKIP         (-995)
95 #define MATCH_SKIP_ARG     (-994)
96 #define MATCH_THEN         (-993)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Group frame type values. Zero means the frame is not a group frame. The
101 lower 16 bits are used for data (e.g. the capture number). Group frames are
102 used for most groups so that information about the start is easily available at
103 the end without having to scan back through intermediate frames (backtrack
104 points). */
105 
106 #define GF_CAPTURE     0x00010000u
107 #define GF_NOCAPTURE   0x00020000u
108 #define GF_CONDASSERT  0x00030000u
109 #define GF_RECURSE     0x00040000u
110 
111 /* Masks for the identity and data parts of the group frame type. */
112 
113 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
114 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115 
116 /* Repetition types */
117 
118 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119 
120 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121 infinity. */
122 
123 static const uint32_t rep_min[] = {
124   0, 0,       /* * and *? */
125   1, 1,       /* + and +? */
126   0, 0,       /* ? and ?? */
127   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129 
130 static const uint32_t rep_max[] = {
131   UINT32_MAX, UINT32_MAX,      /* * and *? */
132   UINT32_MAX, UINT32_MAX,      /* + and +? */
133   1, 1,                        /* ? and ?? */
134   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136 
137 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138 
139 static const uint32_t rep_typ[] = {
140   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146 
147 /* Numbers for RMATCH calls at backtracking points. When these lists are
148 changed, the code at RETURN_SWITCH below must be updated in sync.  */
149 
150 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153        RM31,  RM32, RM33, RM34, RM35, RM36 };
154 
155 #ifdef SUPPORT_WIDE_CHARS
156 enum { RM100=100, RM101 };
157 #endif
158 
159 #ifdef SUPPORT_UNICODE
160 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162        RM216,     RM217, RM218, RM219, RM220, RM221, RM222 };
163 #endif
164 
165 /* Define short names for general fields in the current backtrack frame, which
166 is always pointed to by the F variable. Occasional references to fields in
167 other frames are written out explicitly. There are also some fields in the
168 current frame whose names start with "temp" that are used for short-term,
169 localised backtracking memory. These are #defined with Lxxx names at the point
170 of use and undefined afterwards. */
171 
172 #define Fback_frame        F->back_frame
173 #define Fcapture_last      F->capture_last
174 #define Fcurrent_recurse   F->current_recurse
175 #define Fecode             F->ecode
176 #define Feptr              F->eptr
177 #define Fgroup_frame_type  F->group_frame_type
178 #define Flast_group_offset F->last_group_offset
179 #define Flength            F->length
180 #define Fmark              F->mark
181 #define Frdepth            F->rdepth
182 #define Fstart_match       F->start_match
183 #define Foffset_top        F->offset_top
184 #define Foccu              F->occu
185 #define Fop                F->op
186 #define Fovector           F->ovector
187 #define Freturn_id         F->return_id
188 
189 
190 #ifdef DEBUG_FRAMES_DISPLAY
191 /*************************************************
192 *      Display current frames and contents       *
193 *************************************************/
194 
195 /* This debugging function displays the current set of frames and their
196 contents. It is not called automatically from anywhere, the intention being
197 that calls can be inserted where necessary when debugging frame-related
198 problems.
199 
200 Arguments:
201   f           the file to write to
202   F           the current top frame
203   P           a previous frame of interest
204   frame_size  the frame size
205   mb          points to the match block
206   s           identification text
207 
208 Returns:    nothing
209 */
210 
211 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,const char * s,...)212 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
213   match_block *mb, const char *s, ...)
214 {
215 uint32_t i;
216 heapframe *Q;
217 va_list ap;
218 va_start(ap, s);
219 
220 fprintf(f, "FRAMES ");
221 vfprintf(f, s, ap);
222 va_end(ap);
223 
224 if (P != NULL) fprintf(f, " P=%lu",
225   ((char *)P - (char *)(mb->match_frames))/frame_size);
226 fprintf(f, "\n");
227 
228 for (i = 0, Q = mb->match_frames;
229      Q <= F;
230      i++, Q = (heapframe *)((char *)Q + frame_size))
231   {
232   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
233     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
234     Q->back_frame, Q->return_id);
235 
236   if (Q->last_group_offset == PCRE2_UNSET)
237     fprintf(f, " lgoffset=unset\n");
238   else
239     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
240   }
241 }
242 
243 #endif
244 
245 
246 
247 /*************************************************
248 *                Process a callout               *
249 *************************************************/
250 
251 /* This function is called for all callouts, whether "standalone" or at the
252 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
253 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
254 with fixed values.
255 
256 Arguments:
257   F          points to the current backtracking frame
258   mb         points to the match block
259   lengthptr  where to return the length of the callout item
260 
261 Returns:     the return from the callout
262              or 0 if no callout function exists
263 */
264 
265 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)266 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
267 {
268 int rc;
269 PCRE2_SIZE save0, save1;
270 PCRE2_SIZE *callout_ovector;
271 pcre2_callout_block *cb;
272 
273 *lengthptr = (*Fecode == OP_CALLOUT)?
274   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
275 
276 if (mb->callout == NULL) return 0;   /* No callout function provided */
277 
278 /* The original matching code (pre 10.30) worked directly with the ovector
279 passed by the user, and this was passed to callouts. Now that the working
280 ovector is in the backtracking frame, it no longer needs to reserve space for
281 the overall match offsets (which would waste space in the frame). For backward
282 compatibility, however, we pass capture_top and offset_vector to the callout as
283 if for the extended ovector, and we ensure that the first two slots are unset
284 by preserving and restoring their current contents. Picky compilers complain if
285 references such as Fovector[-2] are use directly, so we set up a separate
286 pointer. */
287 
288 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
289 
290 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
291 are set externally. The first 3 never change; the last is updated for each
292 bumpalong. */
293 
294 cb = mb->cb;
295 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
296 cb->capture_last     = Fcapture_last;
297 cb->offset_vector    = callout_ovector;
298 cb->mark             = mb->nomatch_mark;
299 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
300 cb->pattern_position = GET(Fecode, 1);
301 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
302 
303 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
304   {
305   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
306   cb->callout_string_offset = 0;
307   cb->callout_string = NULL;
308   cb->callout_string_length = 0;
309   }
310 else  /* String callout */
311   {
312   cb->callout_number = 0;
313   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
314   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
315   cb->callout_string_length =
316     *lengthptr - (1 + 4*LINK_SIZE) - 2;
317   }
318 
319 save0 = callout_ovector[0];
320 save1 = callout_ovector[1];
321 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
322 rc = mb->callout(cb, mb->callout_data);
323 callout_ovector[0] = save0;
324 callout_ovector[1] = save1;
325 cb->callout_flags = 0;
326 return rc;
327 }
328 
329 
330 
331 /*************************************************
332 *          Match a back-reference                *
333 *************************************************/
334 
335 /* This function is called only when it is known that the offset lies within
336 the offsets that have so far been used in the match. Note that in caseless
337 UTF-8 mode, the number of subject bytes matched may be different to the number
338 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
339 seems unlikely.)
340 
341 Arguments:
342   offset      index into the offset vector
343   caseless    TRUE if caseless
344   F           the current backtracking frame pointer
345   mb          points to match block
346   lengthptr   pointer for returning the length matched
347 
348 Returns:      = 0 sucessful match; number of code units matched is set
349               < 0 no match
350               > 0 partial match
351 */
352 
353 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)354 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
355   PCRE2_SIZE *lengthptr)
356 {
357 PCRE2_SPTR p;
358 PCRE2_SIZE length;
359 PCRE2_SPTR eptr;
360 PCRE2_SPTR eptr_start;
361 
362 /* Deal with an unset group. The default is no match, but there is an option to
363 match an empty string. */
364 
365 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
366   {
367   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
368     {
369     *lengthptr = 0;
370     return 0;      /* Match */
371     }
372   else return -1;  /* No match */
373   }
374 
375 /* Separate the caseless and UTF cases for speed. */
376 
377 eptr = eptr_start = Feptr;
378 p = mb->start_subject + Fovector[offset];
379 length = Fovector[offset+1] - Fovector[offset];
380 
381 if (caseless)
382   {
383 #if defined SUPPORT_UNICODE
384   if ((mb->poptions & PCRE2_UTF) != 0)
385     {
386     /* Match characters up to the end of the reference. NOTE: the number of
387     code units matched may differ, because in UTF-8 there are some characters
388     whose upper and lower case codes have different numbers of bytes. For
389     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
390     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
391     sequence of two of the latter. It is important, therefore, to check the
392     length along the reference, not along the subject (earlier code did this
393     wrong). */
394 
395     PCRE2_SPTR endptr = p + length;
396     while (p < endptr)
397       {
398       uint32_t c, d;
399       const ucd_record *ur;
400       if (eptr >= mb->end_subject) return 1;   /* Partial match */
401       GETCHARINC(c, eptr);
402       GETCHARINC(d, p);
403       ur = GET_UCD(d);
404       if (c != d && c != (uint32_t)((int)d + ur->other_case))
405         {
406         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
407         for (;;)
408           {
409           if (c < *pp) return -1;  /* No match */
410           if (c == *pp++) break;
411           }
412         }
413       }
414     }
415   else
416 #endif
417 
418     /* Not in UTF mode */
419 
420     {
421     for (; length > 0; length--)
422       {
423       uint32_t cc, cp;
424       if (eptr >= mb->end_subject) return 1;   /* Partial match */
425       cc = UCHAR21TEST(eptr);
426       cp = UCHAR21TEST(p);
427       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
428         return -1;  /* No match */
429       p++;
430       eptr++;
431       }
432     }
433   }
434 
435 /* In the caseful case, we can just compare the code units, whether or not we
436 are in UTF mode. When partial matching, we have to do this unit-by-unit. */
437 
438 else
439   {
440   if (mb->partial != 0)
441     {
442     for (; length > 0; length--)
443       {
444       if (eptr >= mb->end_subject) return 1;   /* Partial match */
445       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
446       }
447     }
448 
449   /* Not partial matching */
450 
451   else
452     {
453     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
454     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
455     eptr += length;
456     }
457   }
458 
459 *lengthptr = eptr - eptr_start;
460 return 0;  /* Match */
461 }
462 
463 
464 
465 /******************************************************************************
466 *******************************************************************************
467                    "Recursion" in the match() function
468 
469 The original match() function was highly recursive, but this proved to be the
470 source of a number of problems over the years, mostly because of the relatively
471 small system stacks that are commonly found. As new features were added to
472 patterns, various kludges were invented to reduce the amount of stack used,
473 making the code hard to understand in places.
474 
475 A version did exist that used individual frames on the heap instead of calling
476 match() recursively, but this ran substantially slower. The current version is
477 a refactoring that uses a vector of frames to remember backtracking points.
478 This runs no slower, and possibly even a bit faster than the original recursive
479 implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
480 50 frames) is allocated on the system stack. If this is not big enough, the
481 heap is used for a larger vector.
482 
483 *******************************************************************************
484 ******************************************************************************/
485 
486 
487 
488 
489 /*************************************************
490 *       Macros for the match() function          *
491 *************************************************/
492 
493 /* These macros pack up tests that are used for partial matching several times
494 in the code. We set the "hit end" flag if the pointer is at the end of the
495 subject and also past the earliest inspected character (i.e. something has been
496 matched, even if not part of the actual matched string). For hard partial
497 matching, we then return immediately. The second one is used when we already
498 know we are past the end of the subject. */
499 
500 #define CHECK_PARTIAL()\
501   if (mb->partial != 0 && Feptr >= mb->end_subject && \
502       Feptr > mb->start_used_ptr) \
503     { \
504     mb->hitend = TRUE; \
505     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
506     }
507 
508 #define SCHECK_PARTIAL()\
509   if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
510     { \
511     mb->hitend = TRUE; \
512     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
513     }
514 
515 /* These macros are used to implement backtracking. They simulate a recursive
516 call to the match() function by means of a local vector of frames which
517 remember the backtracking points. */
518 
519 #define RMATCH(ra,rb)\
520   {\
521   start_ecode = ra;\
522   Freturn_id = rb;\
523   goto MATCH_RECURSE;\
524   L_##rb:;\
525   }
526 
527 #define RRETURN(ra)\
528   {\
529   rrc = ra;\
530   goto RETURN_SWITCH;\
531   }
532 
533 
534 
535 /*************************************************
536 *         Match from current position            *
537 *************************************************/
538 
539 /* This function is called to run one match attempt at a single starting point
540 in the subject.
541 
542 Performance note: It might be tempting to extract commonly used fields from the
543 mb structure (e.g. end_subject) into individual variables to improve
544 performance. Tests using gcc on a SPARC disproved this; in the first case, it
545 made performance worse.
546 
547 Arguments:
548    start_eptr   starting character in subject
549    start_ecode  starting position in compiled code
550    ovector      pointer to the final output vector
551    oveccount    number of pairs in ovector
552    top_bracket  number of capturing parentheses in the pattern
553    frame_size   size of each backtracking frame
554    mb           pointer to "static" variables block
555 
556 Returns:        MATCH_MATCH if matched            )  these values are >= 0
557                 MATCH_NOMATCH if failed to match  )
558                 negative MATCH_xxx value for PRUNE, SKIP, etc
559                 negative PCRE2_ERROR_xxx value if aborted by an error condition
560                 (e.g. stopped by repeated call or depth limit)
561 */
562 
563 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,PCRE2_SIZE * ovector,uint16_t oveccount,uint16_t top_bracket,PCRE2_SIZE frame_size,match_block * mb)564 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
565   uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
566   match_block *mb)
567 {
568 /* Frame-handling variables */
569 
570 heapframe *F;           /* Current frame pointer */
571 heapframe *N = NULL;    /* Temporary frame pointers */
572 heapframe *P = NULL;
573 heapframe *assert_accept_frame;  /* For passing back the frame with captures */
574 PCRE2_SIZE frame_copy_size;      /* Amount to copy when creating a new frame */
575 
576 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
577 
578 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
579 PCRE2_SIZE offset;      /* Used for group offsets */
580 PCRE2_SIZE length;      /* Used for various length calculations */
581 
582 int rrc;                /* Return from functions & backtracking "recursions" */
583 #ifdef SUPPORT_UNICODE
584 int proptype;           /* Type of character property */
585 #endif
586 
587 uint32_t i;             /* Used for local loops */
588 uint32_t fc;            /* Character values */
589 uint32_t number;        /* Used for group and other numbers */
590 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
591 uint32_t group_frame_type;  /* Specifies type for new group frames */
592 
593 BOOL condition;         /* Used in conditional groups */
594 BOOL cur_is_word;       /* Used in "word" tests */
595 BOOL prev_is_word;      /* Used in "word" tests */
596 
597 /* UTF flag */
598 
599 #ifdef SUPPORT_UNICODE
600 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
601 #else
602 BOOL utf = FALSE;
603 #endif
604 
605 /* This is the length of the last part of a backtracking frame that must be
606 copied when a new frame is created. */
607 
608 frame_copy_size = frame_size - offsetof(heapframe, eptr);
609 
610 /* Set up the first current frame at the start of the vector, and initialize
611 fields that are not reset for new frames. */
612 
613 F = mb->match_frames;
614 Frdepth = 0;                        /* "Recursion" depth */
615 Fcapture_last = 0;                  /* Number of most recent capture */
616 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
617 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
618 Fmark = NULL;                       /* Most recent mark */
619 Foffset_top = 0;                    /* End of captures within the frame */
620 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
621 group_frame_type = 0;               /* Not a start of group frame */
622 goto NEW_FRAME;                     /* Start processing with this frame */
623 
624 /* Come back here when we want to create a new frame for remembering a
625 backtracking point. */
626 
627 MATCH_RECURSE:
628 
629 /* Set up a new backtracking frame. If the vector is full, get a new one
630 on the heap, doubling the size, but constrained by the heap limit. */
631 
632 N = (heapframe *)((char *)F + frame_size);
633 if (N >= mb->match_frames_top)
634   {
635   PCRE2_SIZE newsize = mb->frame_vector_size * 2;
636   heapframe *new;
637 
638   if ((newsize / 1024) > mb->heap_limit)
639     {
640     PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
641     if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
642     newsize = maxsize;
643     }
644 
645   new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
646   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
647   memcpy(new, mb->match_frames, mb->frame_vector_size);
648 
649   F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
650   N = (heapframe *)((char *)F + frame_size);
651 
652   if (mb->match_frames != mb->stack_frames)
653     mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
654   mb->match_frames = new;
655   mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
656   mb->frame_vector_size = newsize;
657   }
658 
659 #ifdef DEBUG_SHOW_RMATCH
660 fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
661 if (group_frame_type != 0)
662   {
663   fprintf(stderr, " type=%x ", group_frame_type);
664   switch (GF_IDMASK(group_frame_type))
665     {
666     case GF_CAPTURE:
667     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
668     break;
669 
670     case GF_NOCAPTURE:
671     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
672     break;
673 
674     case GF_CONDASSERT:
675     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
676     break;
677 
678     case GF_RECURSE:
679     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
680     break;
681 
682     default:
683     fprintf(stderr, "*** unknown ***");
684     break;
685     }
686   }
687 fprintf(stderr, "\n");
688 #endif
689 
690 /* Copy those fields that must be copied into the new frame, increase the
691 "recursion" depth (i.e. the new frame's index) and then make the new frame
692 current. */
693 
694 memcpy((char *)N + offsetof(heapframe, eptr),
695        (char *)F + offsetof(heapframe, eptr),
696        frame_copy_size);
697 
698 N->rdepth = Frdepth + 1;
699 F = N;
700 
701 /* Carry on processing with a new frame. */
702 
703 NEW_FRAME:
704 Fgroup_frame_type = group_frame_type;
705 Fecode = start_ecode;      /* Starting code pointer */
706 Fback_frame = frame_size;  /* Default is go back one frame */
707 
708 /* If this is a special type of group frame, remember its offset for quick
709 access at the end of the group. If this is a recursion, set a new current
710 recursion value. */
711 
712 if (group_frame_type != 0)
713   {
714   Flast_group_offset = (char *)F - (char *)mb->match_frames;
715   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
716     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
717   group_frame_type = 0;
718   }
719 
720 
721 /* ========================================================================= */
722 /* This is the main processing loop. First check that we haven't recorded too
723 many backtracks (search tree is too large), or that we haven't exceeded the
724 recursive depth limit (used too many backtracking frames). If not, process the
725 opcodes. */
726 
727 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
728 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
729 
730 for (;;)
731   {
732 #ifdef DEBUG_SHOW_OPS
733 fprintf(stderr, "++ op=%d\n", *Fecode);
734 #endif
735 
736   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
737   switch(Fop)
738     {
739     /* ===================================================================== */
740     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
741     any currently open capturing brackets. Unlike reaching the end of a group,
742     where we know the starting frame is at the top of the chained frames, in
743     this case we have to search back for the relevant frame in case other types
744     of group that use chained frames have intervened. Multiple OP_CLOSEs always
745     come innermost first, which matches the chain order. We can ignore this in
746     a recursion, because captures are not passed out of recursions. */
747 
748     case OP_CLOSE:
749     if (Fcurrent_recurse == RECURSE_UNSET)
750       {
751       number = GET2(Fecode, 1);
752       offset = Flast_group_offset;
753       for(;;)
754         {
755         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
756         N = (heapframe *)((char *)mb->match_frames + offset);
757         P = (heapframe *)((char *)N - frame_size);
758         if (N->group_frame_type == (GF_CAPTURE | number)) break;
759         offset = P->last_group_offset;
760         }
761       offset = (number << 1) - 2;
762       Fcapture_last = number;
763       Fovector[offset] = P->eptr - mb->start_subject;
764       Fovector[offset+1] = Feptr - mb->start_subject;
765       if (offset >= Foffset_top) Foffset_top = offset + 2;
766       }
767     Fecode += PRIV(OP_lengths)[*Fecode];
768     break;
769 
770 
771     /* ===================================================================== */
772     /* Real or forced end of the pattern, assertion, or recursion. In an
773     assertion ACCEPT, update the last used pointer and remember the current
774     frame so that the captures and mark can be fished out of it. */
775 
776     case OP_ASSERT_ACCEPT:
777     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
778     assert_accept_frame = F;
779     RRETURN(MATCH_ACCEPT);
780 
781     /* If recursing, we have to find the most recent recursion. */
782 
783     case OP_ACCEPT:
784     case OP_END:
785 
786     /* Handle end of a recursion. */
787 
788     if (Fcurrent_recurse != RECURSE_UNSET)
789       {
790       offset = Flast_group_offset;
791       for(;;)
792         {
793         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
794         N = (heapframe *)((char *)mb->match_frames + offset);
795         P = (heapframe *)((char *)N - frame_size);
796         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
797         offset = P->last_group_offset;
798         }
799 
800       /* N is now the frame of the recursion; the previous frame is at the
801       OP_RECURSE position. Go back there, copying the current subject position
802       and mark, and move on past the OP_RECURSE. */
803 
804       P->eptr = Feptr;
805       P->mark = Fmark;
806       F = P;
807       Fecode += 1 + LINK_SIZE;
808       continue;
809       }
810 
811     /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
812     is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
813     start of the subject. In both cases, backtracking will then try other
814     alternatives, if any. */
815 
816     if (Feptr == Fstart_match &&
817          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
818            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
819              Fstart_match == mb->start_subject + mb->start_offset)))
820       RRETURN(MATCH_NOMATCH);
821 
822     /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
823     the end of the subject. After (*ACCEPT) we fail the entire match (at this
824     position) but backtrack on reaching the end of the pattern. */
825 
826     if (Feptr < mb->end_subject &&
827         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
828       {
829       if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
830       return MATCH_NOMATCH;
831       }
832 
833     /* We have a successful match of the whole pattern. Record the result and
834     then do a direct return from the function. If there is space in the offset
835     vector, set any pairs that follow the highest-numbered captured string but
836     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
837     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
838     dynamically. It is only those at the end that need setting here. */
839 
840     mb->end_match_ptr = Feptr;           /* Record where we ended */
841     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
842     mb->mark = Fmark;                    /* and the last success mark */
843     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
844 
845     ovector[0] = Fstart_match - mb->start_subject;
846     ovector[1] = Feptr - mb->start_subject;
847 
848     /* Set i to the smaller of the sizes of the external and frame ovectors. */
849 
850     i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
851     memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
852     while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
853     return MATCH_MATCH;  /* Note: NOT RRETURN */
854 
855 
856     /*===================================================================== */
857     /* Match any single character type except newline; have to take care with
858     CRLF newlines and partial matching. */
859 
860     case OP_ANY:
861     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
862     if (mb->partial != 0 &&
863         Feptr == mb->end_subject - 1 &&
864         NLBLOCK->nltype == NLTYPE_FIXED &&
865         NLBLOCK->nllen == 2 &&
866         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
867       {
868       mb->hitend = TRUE;
869       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
870       }
871     /* Fall through */
872 
873     /* Match any single character whatsoever. */
874 
875     case OP_ALLANY:
876     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
877       {                            /* not be updated before SCHECK_PARTIAL. */
878       SCHECK_PARTIAL();
879       RRETURN(MATCH_NOMATCH);
880       }
881     Feptr++;
882 #ifdef SUPPORT_UNICODE
883     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
884 #endif
885     Fecode++;
886     break;
887 
888 
889     /* ===================================================================== */
890     /* Match a single code unit, even in UTF mode. This opcode really does
891     match any code unit, even newline. (It really should be called ANYCODEUNIT,
892     of course - the byte name is from pre-16 bit days.) */
893 
894     case OP_ANYBYTE:
895     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
896       {                             /* not be updated before SCHECK_PARTIAL. */
897       SCHECK_PARTIAL();
898       RRETURN(MATCH_NOMATCH);
899       }
900     Feptr++;
901     Fecode++;
902     break;
903 
904 
905     /* ===================================================================== */
906     /* Match a single character, casefully */
907 
908     case OP_CHAR:
909 #ifdef SUPPORT_UNICODE
910     if (utf)
911       {
912       Flength = 1;
913       Fecode++;
914       GETCHARLEN(fc, Fecode, Flength);
915       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
916         {
917         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
918         RRETURN(MATCH_NOMATCH);
919         }
920       for (; Flength > 0; Flength--)
921         {
922         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
923         }
924       }
925     else
926 #endif
927     /* Not UTF mode */
928       {
929       if (mb->end_subject - Feptr < 1)
930         {
931         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
932         RRETURN(MATCH_NOMATCH);
933         }
934       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
935       Fecode += 2;
936       }
937     break;
938 
939 
940     /* ===================================================================== */
941     /* Match a single character, caselessly. If we are at the end of the
942     subject, give up immediately. We get here only when the pattern character
943     has at most one other case. Characters with more than two cases are coded
944     as OP_PROP with the pseudo-property PT_CLIST. */
945 
946     case OP_CHARI:
947     if (Feptr >= mb->end_subject)
948       {
949       SCHECK_PARTIAL();
950       RRETURN(MATCH_NOMATCH);
951       }
952 
953 #ifdef SUPPORT_UNICODE
954     if (utf)
955       {
956       Flength = 1;
957       Fecode++;
958       GETCHARLEN(fc, Fecode, Flength);
959 
960       /* If the pattern character's value is < 128, we know that its other case
961       (if any) is also < 128 (and therefore only one code unit long in all
962       code-unit widths), so we can use the fast lookup table. We checked above
963       that there is at least one character left in the subject. */
964 
965       if (fc < 128)
966         {
967         uint32_t cc = UCHAR21(Feptr);
968         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
969         Fecode++;
970         Feptr++;
971         }
972 
973       /* Otherwise we must pick up the subject character and use Unicode
974       property support to test its other case. Note that we cannot use the
975       value of "Flength" to check for sufficient bytes left, because the other
976       case of the character may have more or fewer code units. */
977 
978       else
979         {
980         uint32_t dc;
981         GETCHARINC(dc, Feptr);
982         Fecode += Flength;
983         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
984         }
985       }
986     else
987 #endif   /* SUPPORT_UNICODE */
988 
989     /* Not UTF mode; use the table for characters < 256. */
990       {
991       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
992           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
993       Feptr++;
994       Fecode += 2;
995       }
996     break;
997 
998 
999     /* ===================================================================== */
1000     /* Match not a single character. */
1001 
1002     case OP_NOT:
1003     case OP_NOTI:
1004     if (Feptr >= mb->end_subject)
1005       {
1006       SCHECK_PARTIAL();
1007       RRETURN(MATCH_NOMATCH);
1008       }
1009 #ifdef SUPPORT_UNICODE
1010     if (utf)
1011       {
1012       uint32_t ch;
1013       Fecode++;
1014       GETCHARINC(ch, Fecode);
1015       GETCHARINC(fc, Feptr);
1016       if (ch == fc)
1017         {
1018         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1019         }
1020       else if (Fop == OP_NOTI)   /* If caseless */
1021         {
1022         if (ch > 127)
1023           ch = UCD_OTHERCASE(ch);
1024         else
1025           ch = TABLE_GET(ch, mb->fcc, ch);
1026         if (ch == fc) RRETURN(MATCH_NOMATCH);
1027         }
1028       }
1029     else
1030 #endif  /* SUPPORT_UNICODE */
1031       {
1032       uint32_t ch = Fecode[1];
1033       fc = *Feptr++;
1034       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1035         RRETURN(MATCH_NOMATCH);
1036       Fecode += 2;
1037       }
1038     break;
1039 
1040 
1041     /* ===================================================================== */
1042     /* Match a single character repeatedly. */
1043 
1044 #define Loclength    F->temp_size
1045 #define Lstart_eptr  F->temp_sptr[0]
1046 #define Lcharptr     F->temp_sptr[1]
1047 #define Lmin         F->temp_32[0]
1048 #define Lmax         F->temp_32[1]
1049 #define Lc           F->temp_32[2]
1050 #define Loc          F->temp_32[3]
1051 
1052     case OP_EXACT:
1053     case OP_EXACTI:
1054     Lmin = Lmax = GET2(Fecode, 1);
1055     Fecode += 1 + IMM2_SIZE;
1056     goto REPEATCHAR;
1057 
1058     case OP_POSUPTO:
1059     case OP_POSUPTOI:
1060     reptype = REPTYPE_POS;
1061     Lmin = 0;
1062     Lmax = GET2(Fecode, 1);
1063     Fecode += 1 + IMM2_SIZE;
1064     goto REPEATCHAR;
1065 
1066     case OP_UPTO:
1067     case OP_UPTOI:
1068     reptype = REPTYPE_MAX;
1069     Lmin = 0;
1070     Lmax = GET2(Fecode, 1);
1071     Fecode += 1 + IMM2_SIZE;
1072     goto REPEATCHAR;
1073 
1074     case OP_MINUPTO:
1075     case OP_MINUPTOI:
1076     reptype = REPTYPE_MIN;
1077     Lmin = 0;
1078     Lmax = GET2(Fecode, 1);
1079     Fecode += 1 + IMM2_SIZE;
1080     goto REPEATCHAR;
1081 
1082     case OP_POSSTAR:
1083     case OP_POSSTARI:
1084     reptype = REPTYPE_POS;
1085     Lmin = 0;
1086     Lmax = UINT32_MAX;
1087     Fecode++;
1088     goto REPEATCHAR;
1089 
1090     case OP_POSPLUS:
1091     case OP_POSPLUSI:
1092     reptype = REPTYPE_POS;
1093     Lmin = 1;
1094     Lmax = UINT32_MAX;
1095     Fecode++;
1096     goto REPEATCHAR;
1097 
1098     case OP_POSQUERY:
1099     case OP_POSQUERYI:
1100     reptype = REPTYPE_POS;
1101     Lmin = 0;
1102     Lmax = 1;
1103     Fecode++;
1104     goto REPEATCHAR;
1105 
1106     case OP_STAR:
1107     case OP_STARI:
1108     case OP_MINSTAR:
1109     case OP_MINSTARI:
1110     case OP_PLUS:
1111     case OP_PLUSI:
1112     case OP_MINPLUS:
1113     case OP_MINPLUSI:
1114     case OP_QUERY:
1115     case OP_QUERYI:
1116     case OP_MINQUERY:
1117     case OP_MINQUERYI:
1118     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1119     Lmin = rep_min[fc];
1120     Lmax = rep_max[fc];
1121     reptype = rep_typ[fc];
1122 
1123     /* Common code for all repeated single-character matches. We first check
1124     for the minimum number of characters. If the minimum equals the maximum, we
1125     are done. Otherwise, if minimizing, check the rest of the pattern for a
1126     match; if there isn't one, advance up to the maximum, one character at a
1127     time.
1128 
1129     If maximizing, advance up to the maximum number of matching characters,
1130     until Feptr is past the end of the maximum run. If possessive, we are
1131     then done (no backing up). Otherwise, match at this position; anything
1132     other than no match is immediately returned. For nomatch, back up one
1133     character, unless we are matching \R and the last thing matched was
1134     \r\n, in which case, back up two code units until we reach the first
1135     optional character position.
1136 
1137     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1138     for speed. */
1139 
1140     REPEATCHAR:
1141 #ifdef SUPPORT_UNICODE
1142     if (utf)
1143       {
1144       Flength = 1;
1145       Lcharptr = Fecode;
1146       GETCHARLEN(fc, Fecode, Flength);
1147       Fecode += Flength;
1148 
1149       /* Handle multi-code-unit character matching, caseful and caseless. */
1150 
1151       if (Flength > 1)
1152         {
1153         uint32_t othercase;
1154 
1155         if (Fop >= OP_STARI &&     /* Caseless */
1156             (othercase = UCD_OTHERCASE(fc)) != fc)
1157           Loclength = PRIV(ord2utf)(othercase, Foccu);
1158         else Loclength = 0;
1159 
1160         for (i = 1; i <= Lmin; i++)
1161           {
1162           if (Feptr <= mb->end_subject - Flength &&
1163             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1164           else if (Loclength > 0 &&
1165                    Feptr <= mb->end_subject - Loclength &&
1166                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1167             Feptr += Loclength;
1168           else
1169             {
1170             CHECK_PARTIAL();
1171             RRETURN(MATCH_NOMATCH);
1172             }
1173           }
1174 
1175         if (Lmin == Lmax) continue;
1176 
1177         if (reptype == REPTYPE_MIN)
1178           {
1179           for (;;)
1180             {
1181             RMATCH(Fecode, RM202);
1182             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1183             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1184             if (Feptr <= mb->end_subject - Flength &&
1185               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1186             else if (Loclength > 0 &&
1187                      Feptr <= mb->end_subject - Loclength &&
1188                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1189               Feptr += Loclength;
1190             else
1191               {
1192               CHECK_PARTIAL();
1193               RRETURN(MATCH_NOMATCH);
1194               }
1195             }
1196           /* Control never gets here */
1197           }
1198 
1199         else  /* Maximize */
1200           {
1201           Lstart_eptr = Feptr;
1202           for (i = Lmin; i < Lmax; i++)
1203             {
1204             if (Feptr <= mb->end_subject - Flength &&
1205                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1206               Feptr += Flength;
1207             else if (Loclength > 0 &&
1208                      Feptr <= mb->end_subject - Loclength &&
1209                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1210               Feptr += Loclength;
1211             else
1212               {
1213               CHECK_PARTIAL();
1214               break;
1215               }
1216             }
1217 
1218           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1219           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1220           go too far. */
1221 
1222           if (reptype != REPTYPE_POS) for(;;)
1223             {
1224             if (Feptr <= Lstart_eptr) break;
1225             RMATCH(Fecode, RM203);
1226             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1227             Feptr--;
1228             BACKCHAR(Feptr);
1229             }
1230           }
1231         break;   /* End of repeated wide character handling */
1232         }
1233 
1234       /* Length of UTF character is 1. Put it into the preserved variable and
1235       fall through to the non-UTF code. */
1236 
1237       Lc = fc;
1238       }
1239     else
1240 #endif  /* SUPPORT_UNICODE */
1241 
1242     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1243     above. */
1244 
1245     Lc = *Fecode++;
1246 
1247     /* Caseless comparison */
1248 
1249     if (Fop >= OP_STARI)
1250       {
1251 #if PCRE2_CODE_UNIT_WIDTH == 8
1252       /* Lc must be < 128 in UTF-8 mode. */
1253       Loc = mb->fcc[Lc];
1254 #else /* 16-bit & 32-bit */
1255 #ifdef SUPPORT_UNICODE
1256       if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1257       else
1258 #endif  /* SUPPORT_UNICODE */
1259       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1260 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1261 
1262       for (i = 1; i <= Lmin; i++)
1263         {
1264         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1265         if (Feptr >= mb->end_subject)
1266           {
1267           SCHECK_PARTIAL();
1268           RRETURN(MATCH_NOMATCH);
1269           }
1270         cc = UCHAR21TEST(Feptr);
1271         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1272         Feptr++;
1273         }
1274       if (Lmin == Lmax) continue;
1275 
1276       if (reptype == REPTYPE_MIN)
1277         {
1278         for (;;)
1279           {
1280           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1281           RMATCH(Fecode, RM25);
1282           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1283           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1284           if (Feptr >= mb->end_subject)
1285             {
1286             SCHECK_PARTIAL();
1287             RRETURN(MATCH_NOMATCH);
1288             }
1289           cc = UCHAR21TEST(Feptr);
1290           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1291           Feptr++;
1292           }
1293         /* Control never gets here */
1294         }
1295 
1296       else  /* Maximize */
1297         {
1298         Lstart_eptr = Feptr;
1299         for (i = Lmin; i < Lmax; i++)
1300           {
1301           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1302           if (Feptr >= mb->end_subject)
1303             {
1304             SCHECK_PARTIAL();
1305             break;
1306             }
1307           cc = UCHAR21TEST(Feptr);
1308           if (Lc != cc && Loc != cc) break;
1309           Feptr++;
1310           }
1311         if (reptype != REPTYPE_POS) for (;;)
1312           {
1313           if (Feptr == Lstart_eptr) break;
1314           RMATCH(Fecode, RM26);
1315           Feptr--;
1316           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1317           }
1318         }
1319       }
1320 
1321     /* Caseful comparisons (includes all multi-byte characters) */
1322 
1323     else
1324       {
1325       for (i = 1; i <= Lmin; i++)
1326         {
1327         if (Feptr >= mb->end_subject)
1328           {
1329           SCHECK_PARTIAL();
1330           RRETURN(MATCH_NOMATCH);
1331           }
1332         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1333         }
1334 
1335       if (Lmin == Lmax) continue;
1336 
1337       if (reptype == REPTYPE_MIN)
1338         {
1339         for (;;)
1340           {
1341           RMATCH(Fecode, RM27);
1342           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1343           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1344           if (Feptr >= mb->end_subject)
1345             {
1346             SCHECK_PARTIAL();
1347             RRETURN(MATCH_NOMATCH);
1348             }
1349           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1350           }
1351         /* Control never gets here */
1352         }
1353       else  /* Maximize */
1354         {
1355         Lstart_eptr = Feptr;
1356         for (i = Lmin; i < Lmax; i++)
1357           {
1358           if (Feptr >= mb->end_subject)
1359             {
1360             SCHECK_PARTIAL();
1361             break;
1362             }
1363 
1364           if (Lc != UCHAR21TEST(Feptr)) break;
1365           Feptr++;
1366           }
1367 
1368         if (reptype != REPTYPE_POS) for (;;)
1369           {
1370           if (Feptr <= Lstart_eptr) break;
1371           RMATCH(Fecode, RM28);
1372           Feptr--;
1373           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1374           }
1375         }
1376       }
1377     break;
1378 
1379 #undef Loclength
1380 #undef Lstart_eptr
1381 #undef Lcharptr
1382 #undef Lmin
1383 #undef Lmax
1384 #undef Lc
1385 #undef Loc
1386 
1387 
1388     /* ===================================================================== */
1389     /* Match a negated single one-byte character repeatedly. This is almost a
1390     repeat of the code for a repeated single character, but I haven't found a
1391     nice way of commoning these up that doesn't require a test of the
1392     positive/negative option for each character match. Maybe that wouldn't add
1393     very much to the time taken, but character matching *is* what this is all
1394     about... */
1395 
1396 #define Lstart_eptr  F->temp_sptr[0]
1397 #define Lmin         F->temp_32[0]
1398 #define Lmax         F->temp_32[1]
1399 #define Lc           F->temp_32[2]
1400 #define Loc          F->temp_32[3]
1401 
1402     case OP_NOTEXACT:
1403     case OP_NOTEXACTI:
1404     Lmin = Lmax = GET2(Fecode, 1);
1405     Fecode += 1 + IMM2_SIZE;
1406     goto REPEATNOTCHAR;
1407 
1408     case OP_NOTUPTO:
1409     case OP_NOTUPTOI:
1410     Lmin = 0;
1411     Lmax = GET2(Fecode, 1);
1412     reptype = REPTYPE_MAX;
1413     Fecode += 1 + IMM2_SIZE;
1414     goto REPEATNOTCHAR;
1415 
1416     case OP_NOTMINUPTO:
1417     case OP_NOTMINUPTOI:
1418     Lmin = 0;
1419     Lmax = GET2(Fecode, 1);
1420     reptype = REPTYPE_MIN;
1421     Fecode += 1 + IMM2_SIZE;
1422     goto REPEATNOTCHAR;
1423 
1424     case OP_NOTPOSSTAR:
1425     case OP_NOTPOSSTARI:
1426     reptype = REPTYPE_POS;
1427     Lmin = 0;
1428     Lmax = UINT32_MAX;
1429     Fecode++;
1430     goto REPEATNOTCHAR;
1431 
1432     case OP_NOTPOSPLUS:
1433     case OP_NOTPOSPLUSI:
1434     reptype = REPTYPE_POS;
1435     Lmin = 1;
1436     Lmax = UINT32_MAX;
1437     Fecode++;
1438     goto REPEATNOTCHAR;
1439 
1440     case OP_NOTPOSQUERY:
1441     case OP_NOTPOSQUERYI:
1442     reptype = REPTYPE_POS;
1443     Lmin = 0;
1444     Lmax = 1;
1445     Fecode++;
1446     goto REPEATNOTCHAR;
1447 
1448     case OP_NOTPOSUPTO:
1449     case OP_NOTPOSUPTOI:
1450     reptype = REPTYPE_POS;
1451     Lmin = 0;
1452     Lmax = GET2(Fecode, 1);
1453     Fecode += 1 + IMM2_SIZE;
1454     goto REPEATNOTCHAR;
1455 
1456     case OP_NOTSTAR:
1457     case OP_NOTSTARI:
1458     case OP_NOTMINSTAR:
1459     case OP_NOTMINSTARI:
1460     case OP_NOTPLUS:
1461     case OP_NOTPLUSI:
1462     case OP_NOTMINPLUS:
1463     case OP_NOTMINPLUSI:
1464     case OP_NOTQUERY:
1465     case OP_NOTQUERYI:
1466     case OP_NOTMINQUERY:
1467     case OP_NOTMINQUERYI:
1468     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1469     Lmin = rep_min[fc];
1470     Lmax = rep_max[fc];
1471     reptype = rep_typ[fc];
1472 
1473     /* Common code for all repeated single-character non-matches. */
1474 
1475     REPEATNOTCHAR:
1476     GETCHARINCTEST(Lc, Fecode);
1477 
1478     /* The code is duplicated for the caseless and caseful cases, for speed,
1479     since matching characters is likely to be quite common. First, ensure the
1480     minimum number of matches are present. If Lmin = Lmax, we are done.
1481     Otherwise, if minimizing, keep trying the rest of the expression and
1482     advancing one matching character if failing, up to the maximum.
1483     Alternatively, if maximizing, find the maximum number of characters and
1484     work backwards. */
1485 
1486     if (Fop >= OP_NOTSTARI)     /* Caseless */
1487       {
1488 #ifdef SUPPORT_UNICODE
1489       if (utf && Lc > 127)
1490         Loc = UCD_OTHERCASE(Lc);
1491       else
1492 #endif /* SUPPORT_UNICODE */
1493 
1494       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1495 
1496 #ifdef SUPPORT_UNICODE
1497       if (utf)
1498         {
1499         uint32_t d;
1500         for (i = 1; i <= Lmin; i++)
1501           {
1502           if (Feptr >= mb->end_subject)
1503             {
1504             SCHECK_PARTIAL();
1505             RRETURN(MATCH_NOMATCH);
1506             }
1507           GETCHARINC(d, Feptr);
1508           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1509           }
1510         }
1511       else
1512 #endif  /* SUPPORT_UNICODE */
1513 
1514       /* Not UTF mode */
1515         {
1516         for (i = 1; i <= Lmin; i++)
1517           {
1518           if (Feptr >= mb->end_subject)
1519             {
1520             SCHECK_PARTIAL();
1521             RRETURN(MATCH_NOMATCH);
1522             }
1523           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1524           Feptr++;
1525           }
1526         }
1527 
1528       if (Lmin == Lmax) continue;  /* Finished for exact count */
1529 
1530       if (reptype == REPTYPE_MIN)
1531         {
1532 #ifdef SUPPORT_UNICODE
1533         if (utf)
1534           {
1535           uint32_t d;
1536           for (;;)
1537             {
1538             RMATCH(Fecode, RM204);
1539             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1540             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1541             if (Feptr >= mb->end_subject)
1542               {
1543               SCHECK_PARTIAL();
1544               RRETURN(MATCH_NOMATCH);
1545               }
1546             GETCHARINC(d, Feptr);
1547             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1548             }
1549           }
1550         else
1551 #endif  /*SUPPORT_UNICODE */
1552 
1553         /* Not UTF mode */
1554           {
1555           for (;;)
1556             {
1557             RMATCH(Fecode, RM29);
1558             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1559             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1560             if (Feptr >= mb->end_subject)
1561               {
1562               SCHECK_PARTIAL();
1563               RRETURN(MATCH_NOMATCH);
1564               }
1565             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1566             Feptr++;
1567             }
1568           }
1569         /* Control never gets here */
1570         }
1571 
1572       /* Maximize case */
1573 
1574       else
1575         {
1576         Lstart_eptr = Feptr;
1577 
1578 #ifdef SUPPORT_UNICODE
1579         if (utf)
1580           {
1581           uint32_t d;
1582           for (i = Lmin; i < Lmax; i++)
1583             {
1584             int len = 1;
1585             if (Feptr >= mb->end_subject)
1586               {
1587               SCHECK_PARTIAL();
1588               break;
1589               }
1590             GETCHARLEN(d, Feptr, len);
1591             if (Lc == d || Loc == d) break;
1592             Feptr += len;
1593             }
1594 
1595           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1596           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1597           go too far. */
1598 
1599           if (reptype != REPTYPE_POS) for(;;)
1600             {
1601             if (Feptr <= Lstart_eptr) break;
1602             RMATCH(Fecode, RM205);
1603             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1604             Feptr--;
1605             BACKCHAR(Feptr);
1606             }
1607           }
1608         else
1609 #endif  /* SUPPORT_UNICODE */
1610 
1611         /* Not UTF mode */
1612           {
1613           for (i = Lmin; i < Lmax; i++)
1614             {
1615             if (Feptr >= mb->end_subject)
1616               {
1617               SCHECK_PARTIAL();
1618               break;
1619               }
1620             if (Lc == *Feptr || Loc == *Feptr) break;
1621             Feptr++;
1622             }
1623           if (reptype != REPTYPE_POS) for (;;)
1624             {
1625             if (Feptr == Lstart_eptr) break;
1626             RMATCH(Fecode, RM30);
1627             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1628             Feptr--;
1629             }
1630           }
1631         }
1632       }
1633 
1634     /* Caseful comparisons */
1635 
1636     else
1637       {
1638 #ifdef SUPPORT_UNICODE
1639       if (utf)
1640         {
1641         uint32_t d;
1642         for (i = 1; i <= Lmin; i++)
1643           {
1644           if (Feptr >= mb->end_subject)
1645             {
1646             SCHECK_PARTIAL();
1647             RRETURN(MATCH_NOMATCH);
1648             }
1649           GETCHARINC(d, Feptr);
1650           if (Lc == d) RRETURN(MATCH_NOMATCH);
1651           }
1652         }
1653       else
1654 #endif
1655       /* Not UTF mode */
1656         {
1657         for (i = 1; i <= Lmin; i++)
1658           {
1659           if (Feptr >= mb->end_subject)
1660             {
1661             SCHECK_PARTIAL();
1662             RRETURN(MATCH_NOMATCH);
1663             }
1664           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1665           }
1666         }
1667 
1668       if (Lmin == Lmax) continue;
1669 
1670       if (reptype == REPTYPE_MIN)
1671         {
1672 #ifdef SUPPORT_UNICODE
1673         if (utf)
1674           {
1675           uint32_t d;
1676           for (;;)
1677             {
1678             RMATCH(Fecode, RM206);
1679             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1680             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1681             if (Feptr >= mb->end_subject)
1682               {
1683               SCHECK_PARTIAL();
1684               RRETURN(MATCH_NOMATCH);
1685               }
1686             GETCHARINC(d, Feptr);
1687             if (Lc == d) RRETURN(MATCH_NOMATCH);
1688             }
1689           }
1690         else
1691 #endif
1692         /* Not UTF mode */
1693           {
1694           for (;;)
1695             {
1696             RMATCH(Fecode, RM31);
1697             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1698             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1699             if (Feptr >= mb->end_subject)
1700               {
1701               SCHECK_PARTIAL();
1702               RRETURN(MATCH_NOMATCH);
1703               }
1704             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1705             }
1706           }
1707         /* Control never gets here */
1708         }
1709 
1710       /* Maximize case */
1711 
1712       else
1713         {
1714         Lstart_eptr = Feptr;
1715 
1716 #ifdef SUPPORT_UNICODE
1717         if (utf)
1718           {
1719           uint32_t d;
1720           for (i = Lmin; i < Lmax; i++)
1721             {
1722             int len = 1;
1723             if (Feptr >= mb->end_subject)
1724               {
1725               SCHECK_PARTIAL();
1726               break;
1727               }
1728             GETCHARLEN(d, Feptr, len);
1729             if (Lc == d) break;
1730             Feptr += len;
1731             }
1732 
1733           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1734           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1735           go too far. */
1736 
1737           if (reptype != REPTYPE_POS) for(;;)
1738             {
1739             if (Feptr <= Lstart_eptr) break;
1740             RMATCH(Fecode, RM207);
1741             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1742             Feptr--;
1743             BACKCHAR(Feptr);
1744             }
1745           }
1746         else
1747 #endif
1748         /* Not UTF mode */
1749           {
1750           for (i = Lmin; i < Lmax; i++)
1751             {
1752             if (Feptr >= mb->end_subject)
1753               {
1754               SCHECK_PARTIAL();
1755               break;
1756               }
1757             if (Lc == *Feptr) break;
1758             Feptr++;
1759             }
1760           if (reptype != REPTYPE_POS) for (;;)
1761             {
1762             if (Feptr == Lstart_eptr) break;
1763             RMATCH(Fecode, RM32);
1764             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1765             Feptr--;
1766             }
1767           }
1768         }
1769       }
1770     break;
1771 
1772 #undef Lstart_eptr
1773 #undef Lmin
1774 #undef Lmax
1775 #undef Lc
1776 #undef Loc
1777 
1778 
1779     /* ===================================================================== */
1780     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1781     are used when all the characters in the class have values in the range
1782     0-255, and either the matching is caseful, or the characters are in the
1783     range 0-127 when UTF processing is enabled. The only difference between
1784     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1785     encountered. */
1786 
1787 #define Lmin               F->temp_32[0]
1788 #define Lmax               F->temp_32[1]
1789 #define Lstart_eptr        F->temp_sptr[0]
1790 #define Lbyte_map_address  F->temp_sptr[1]
1791 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1792 
1793     case OP_NCLASS:
1794     case OP_CLASS:
1795       {
1796       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1797       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1798 
1799       /* Look past the end of the item to see if there is repeat information
1800       following. Then obey similar code to character type repeats. */
1801 
1802       switch (*Fecode)
1803         {
1804         case OP_CRSTAR:
1805         case OP_CRMINSTAR:
1806         case OP_CRPLUS:
1807         case OP_CRMINPLUS:
1808         case OP_CRQUERY:
1809         case OP_CRMINQUERY:
1810         case OP_CRPOSSTAR:
1811         case OP_CRPOSPLUS:
1812         case OP_CRPOSQUERY:
1813         fc = *Fecode++ - OP_CRSTAR;
1814         Lmin = rep_min[fc];
1815         Lmax = rep_max[fc];
1816         reptype = rep_typ[fc];
1817         break;
1818 
1819         case OP_CRRANGE:
1820         case OP_CRMINRANGE:
1821         case OP_CRPOSRANGE:
1822         Lmin = GET2(Fecode, 1);
1823         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1824         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1825         reptype = rep_typ[*Fecode - OP_CRSTAR];
1826         Fecode += 1 + 2 * IMM2_SIZE;
1827         break;
1828 
1829         default:               /* No repeat follows */
1830         Lmin = Lmax = 1;
1831         break;
1832         }
1833 
1834       /* First, ensure the minimum number of matches are present. */
1835 
1836 #ifdef SUPPORT_UNICODE
1837       if (utf)
1838         {
1839         for (i = 1; i <= Lmin; i++)
1840           {
1841           if (Feptr >= mb->end_subject)
1842             {
1843             SCHECK_PARTIAL();
1844             RRETURN(MATCH_NOMATCH);
1845             }
1846           GETCHARINC(fc, Feptr);
1847           if (fc > 255)
1848             {
1849             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1850             }
1851           else
1852             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1853           }
1854         }
1855       else
1856 #endif
1857       /* Not UTF mode */
1858         {
1859         for (i = 1; i <= Lmin; i++)
1860           {
1861           if (Feptr >= mb->end_subject)
1862             {
1863             SCHECK_PARTIAL();
1864             RRETURN(MATCH_NOMATCH);
1865             }
1866           fc = *Feptr++;
1867 #if PCRE2_CODE_UNIT_WIDTH != 8
1868           if (fc > 255)
1869             {
1870             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1871             }
1872           else
1873 #endif
1874           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1875           }
1876         }
1877 
1878       /* If Lmax == Lmin we are done. Continue with main loop. */
1879 
1880       if (Lmin == Lmax) continue;
1881 
1882       /* If minimizing, keep testing the rest of the expression and advancing
1883       the pointer while it matches the class. */
1884 
1885       if (reptype == REPTYPE_MIN)
1886         {
1887 #ifdef SUPPORT_UNICODE
1888         if (utf)
1889           {
1890           for (;;)
1891             {
1892             RMATCH(Fecode, RM200);
1893             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1894             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1895             if (Feptr >= mb->end_subject)
1896               {
1897               SCHECK_PARTIAL();
1898               RRETURN(MATCH_NOMATCH);
1899               }
1900             GETCHARINC(fc, Feptr);
1901             if (fc > 255)
1902               {
1903               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1904               }
1905             else
1906               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1907             }
1908           }
1909         else
1910 #endif
1911         /* Not UTF mode */
1912           {
1913           for (;;)
1914             {
1915             RMATCH(Fecode, RM23);
1916             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1917             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1918             if (Feptr >= mb->end_subject)
1919               {
1920               SCHECK_PARTIAL();
1921               RRETURN(MATCH_NOMATCH);
1922               }
1923             fc = *Feptr++;
1924 #if PCRE2_CODE_UNIT_WIDTH != 8
1925             if (fc > 255)
1926               {
1927               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1928               }
1929             else
1930 #endif
1931             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1932             }
1933           }
1934         /* Control never gets here */
1935         }
1936 
1937       /* If maximizing, find the longest possible run, then work backwards. */
1938 
1939       else
1940         {
1941         Lstart_eptr = Feptr;
1942 
1943 #ifdef SUPPORT_UNICODE
1944         if (utf)
1945           {
1946           for (i = Lmin; i < Lmax; i++)
1947             {
1948             int len = 1;
1949             if (Feptr >= mb->end_subject)
1950               {
1951               SCHECK_PARTIAL();
1952               break;
1953               }
1954             GETCHARLEN(fc, Feptr, len);
1955             if (fc > 255)
1956               {
1957               if (Fop == OP_CLASS) break;
1958               }
1959             else
1960               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
1961             Feptr += len;
1962             }
1963 
1964           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
1965 
1966           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1967           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1968           go too far. */
1969 
1970           for (;;)
1971             {
1972             RMATCH(Fecode, RM201);
1973             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1974             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
1975             BACKCHAR(Feptr);
1976             }
1977           }
1978         else
1979 #endif
1980           /* Not UTF mode */
1981           {
1982           for (i = Lmin; i < Lmax; i++)
1983             {
1984             if (Feptr >= mb->end_subject)
1985               {
1986               SCHECK_PARTIAL();
1987               break;
1988               }
1989             fc = *Feptr;
1990 #if PCRE2_CODE_UNIT_WIDTH != 8
1991             if (fc > 255)
1992               {
1993               if (Fop == OP_CLASS) break;
1994               }
1995             else
1996 #endif
1997             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
1998             Feptr++;
1999             }
2000 
2001           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2002 
2003           while (Feptr >= Lstart_eptr)
2004             {
2005             RMATCH(Fecode, RM24);
2006             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2007             Feptr--;
2008             }
2009           }
2010 
2011         RRETURN(MATCH_NOMATCH);
2012         }
2013       }
2014     /* Control never gets here */
2015 
2016 #undef Lbyte_map_address
2017 #undef Lbyte_map
2018 #undef Lstart_eptr
2019 #undef Lmin
2020 #undef Lmax
2021 
2022 
2023     /* ===================================================================== */
2024     /* Match an extended character class. In the 8-bit library, this opcode is
2025     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2026     32-bit libraries, codepoints greater than 255 may be encountered even when
2027     UTF is not supported. */
2028 
2029 #define Lstart_eptr  F->temp_sptr[0]
2030 #define Lxclass_data F->temp_sptr[1]
2031 #define Lmin         F->temp_32[0]
2032 #define Lmax         F->temp_32[1]
2033 
2034 #ifdef SUPPORT_WIDE_CHARS
2035     case OP_XCLASS:
2036       {
2037       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2038       Fecode += GET(Fecode, 1);               /* Advance past the item */
2039 
2040       switch (*Fecode)
2041         {
2042         case OP_CRSTAR:
2043         case OP_CRMINSTAR:
2044         case OP_CRPLUS:
2045         case OP_CRMINPLUS:
2046         case OP_CRQUERY:
2047         case OP_CRMINQUERY:
2048         case OP_CRPOSSTAR:
2049         case OP_CRPOSPLUS:
2050         case OP_CRPOSQUERY:
2051         fc = *Fecode++ - OP_CRSTAR;
2052         Lmin = rep_min[fc];
2053         Lmax = rep_max[fc];
2054         reptype = rep_typ[fc];
2055         break;
2056 
2057         case OP_CRRANGE:
2058         case OP_CRMINRANGE:
2059         case OP_CRPOSRANGE:
2060         Lmin = GET2(Fecode, 1);
2061         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2062         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2063         reptype = rep_typ[*Fecode - OP_CRSTAR];
2064         Fecode += 1 + 2 * IMM2_SIZE;
2065         break;
2066 
2067         default:               /* No repeat follows */
2068         Lmin = Lmax = 1;
2069         break;
2070         }
2071 
2072       /* First, ensure the minimum number of matches are present. */
2073 
2074       for (i = 1; i <= Lmin; i++)
2075         {
2076         if (Feptr >= mb->end_subject)
2077           {
2078           SCHECK_PARTIAL();
2079           RRETURN(MATCH_NOMATCH);
2080           }
2081         GETCHARINCTEST(fc, Feptr);
2082         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2083         }
2084 
2085       /* If Lmax == Lmin we can just continue with the main loop. */
2086 
2087       if (Lmin == Lmax) continue;
2088 
2089       /* If minimizing, keep testing the rest of the expression and advancing
2090       the pointer while it matches the class. */
2091 
2092       if (reptype == REPTYPE_MIN)
2093         {
2094         for (;;)
2095           {
2096           RMATCH(Fecode, RM100);
2097           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2098           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2099           if (Feptr >= mb->end_subject)
2100             {
2101             SCHECK_PARTIAL();
2102             RRETURN(MATCH_NOMATCH);
2103             }
2104           GETCHARINCTEST(fc, Feptr);
2105           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2106           }
2107         /* Control never gets here */
2108         }
2109 
2110       /* If maximizing, find the longest possible run, then work backwards. */
2111 
2112       else
2113         {
2114         Lstart_eptr = Feptr;
2115         for (i = Lmin; i < Lmax; i++)
2116           {
2117           int len = 1;
2118           if (Feptr >= mb->end_subject)
2119             {
2120             SCHECK_PARTIAL();
2121             break;
2122             }
2123 #ifdef SUPPORT_UNICODE
2124           GETCHARLENTEST(fc, Feptr, len);
2125 #else
2126           fc = *Feptr;
2127 #endif
2128           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2129           Feptr += len;
2130           }
2131 
2132         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2133 
2134         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2135         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2136         go too far. */
2137 
2138         for(;;)
2139           {
2140           RMATCH(Fecode, RM101);
2141           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2142           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2143 #ifdef SUPPORT_UNICODE
2144           if (utf) BACKCHAR(Feptr);
2145 #endif
2146           }
2147         RRETURN(MATCH_NOMATCH);
2148         }
2149 
2150       /* Control never gets here */
2151       }
2152 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2153 
2154 #undef Lstart_eptr
2155 #undef Lxclass_data
2156 #undef Lmin
2157 #undef Lmax
2158 
2159 
2160     /* ===================================================================== */
2161     /* Match various character types when PCRE2_UCP is not set. These opcodes
2162     are not generated when PCRE2_UCP is set - instead appropriate property
2163     tests are compiled. */
2164 
2165     case OP_NOT_DIGIT:
2166     if (Feptr >= mb->end_subject)
2167       {
2168       SCHECK_PARTIAL();
2169       RRETURN(MATCH_NOMATCH);
2170       }
2171     GETCHARINCTEST(fc, Feptr);
2172     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2173       RRETURN(MATCH_NOMATCH);
2174     Fecode++;
2175     break;
2176 
2177     case OP_DIGIT:
2178     if (Feptr >= mb->end_subject)
2179       {
2180       SCHECK_PARTIAL();
2181       RRETURN(MATCH_NOMATCH);
2182       }
2183     GETCHARINCTEST(fc, Feptr);
2184     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2185       RRETURN(MATCH_NOMATCH);
2186     Fecode++;
2187     break;
2188 
2189     case OP_NOT_WHITESPACE:
2190     if (Feptr >= mb->end_subject)
2191       {
2192       SCHECK_PARTIAL();
2193       RRETURN(MATCH_NOMATCH);
2194       }
2195     GETCHARINCTEST(fc, Feptr);
2196     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2197       RRETURN(MATCH_NOMATCH);
2198     Fecode++;
2199     break;
2200 
2201     case OP_WHITESPACE:
2202     if (Feptr >= mb->end_subject)
2203       {
2204       SCHECK_PARTIAL();
2205       RRETURN(MATCH_NOMATCH);
2206       }
2207     GETCHARINCTEST(fc, Feptr);
2208     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2209       RRETURN(MATCH_NOMATCH);
2210     Fecode++;
2211     break;
2212 
2213     case OP_NOT_WORDCHAR:
2214     if (Feptr >= mb->end_subject)
2215       {
2216       SCHECK_PARTIAL();
2217       RRETURN(MATCH_NOMATCH);
2218       }
2219     GETCHARINCTEST(fc, Feptr);
2220     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2221       RRETURN(MATCH_NOMATCH);
2222     Fecode++;
2223     break;
2224 
2225     case OP_WORDCHAR:
2226     if (Feptr >= mb->end_subject)
2227       {
2228       SCHECK_PARTIAL();
2229       RRETURN(MATCH_NOMATCH);
2230       }
2231     GETCHARINCTEST(fc, Feptr);
2232     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2233       RRETURN(MATCH_NOMATCH);
2234     Fecode++;
2235     break;
2236 
2237     case OP_ANYNL:
2238     if (Feptr >= mb->end_subject)
2239       {
2240       SCHECK_PARTIAL();
2241       RRETURN(MATCH_NOMATCH);
2242       }
2243     GETCHARINCTEST(fc, Feptr);
2244     switch(fc)
2245       {
2246       default: RRETURN(MATCH_NOMATCH);
2247 
2248       case CHAR_CR:
2249       if (Feptr >= mb->end_subject)
2250         {
2251         SCHECK_PARTIAL();
2252         }
2253       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2254       break;
2255 
2256       case CHAR_LF:
2257       break;
2258 
2259       case CHAR_VT:
2260       case CHAR_FF:
2261       case CHAR_NEL:
2262 #ifndef EBCDIC
2263       case 0x2028:
2264       case 0x2029:
2265 #endif  /* Not EBCDIC */
2266       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2267       break;
2268       }
2269     Fecode++;
2270     break;
2271 
2272     case OP_NOT_HSPACE:
2273     if (Feptr >= mb->end_subject)
2274       {
2275       SCHECK_PARTIAL();
2276       RRETURN(MATCH_NOMATCH);
2277       }
2278     GETCHARINCTEST(fc, Feptr);
2279     switch(fc)
2280       {
2281       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2282       default: break;
2283       }
2284     Fecode++;
2285     break;
2286 
2287     case OP_HSPACE:
2288     if (Feptr >= mb->end_subject)
2289       {
2290       SCHECK_PARTIAL();
2291       RRETURN(MATCH_NOMATCH);
2292       }
2293     GETCHARINCTEST(fc, Feptr);
2294     switch(fc)
2295       {
2296       HSPACE_CASES: break;  /* Byte and multibyte cases */
2297       default: RRETURN(MATCH_NOMATCH);
2298       }
2299     Fecode++;
2300     break;
2301 
2302     case OP_NOT_VSPACE:
2303     if (Feptr >= mb->end_subject)
2304       {
2305       SCHECK_PARTIAL();
2306       RRETURN(MATCH_NOMATCH);
2307       }
2308     GETCHARINCTEST(fc, Feptr);
2309     switch(fc)
2310       {
2311       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2312       default: break;
2313       }
2314     Fecode++;
2315     break;
2316 
2317     case OP_VSPACE:
2318     if (Feptr >= mb->end_subject)
2319       {
2320       SCHECK_PARTIAL();
2321       RRETURN(MATCH_NOMATCH);
2322       }
2323     GETCHARINCTEST(fc, Feptr);
2324     switch(fc)
2325       {
2326       VSPACE_CASES: break;
2327       default: RRETURN(MATCH_NOMATCH);
2328       }
2329     Fecode++;
2330     break;
2331 
2332 
2333 #ifdef SUPPORT_UNICODE
2334 
2335     /* ===================================================================== */
2336     /* Check the next character by Unicode property. We will get here only
2337     if the support is in the binary; otherwise a compile-time error occurs. */
2338 
2339     case OP_PROP:
2340     case OP_NOTPROP:
2341     if (Feptr >= mb->end_subject)
2342       {
2343       SCHECK_PARTIAL();
2344       RRETURN(MATCH_NOMATCH);
2345       }
2346     GETCHARINCTEST(fc, Feptr);
2347       {
2348       const uint32_t *cp;
2349       const ucd_record *prop = GET_UCD(fc);
2350 
2351       switch(Fecode[1])
2352         {
2353         case PT_ANY:
2354         if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2355         break;
2356 
2357         case PT_LAMP:
2358         if ((prop->chartype == ucp_Lu ||
2359              prop->chartype == ucp_Ll ||
2360              prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
2361           RRETURN(MATCH_NOMATCH);
2362         break;
2363 
2364         case PT_GC:
2365         if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
2366           RRETURN(MATCH_NOMATCH);
2367         break;
2368 
2369         case PT_PC:
2370         if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
2371           RRETURN(MATCH_NOMATCH);
2372         break;
2373 
2374         case PT_SC:
2375         if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
2376           RRETURN(MATCH_NOMATCH);
2377         break;
2378 
2379         /* These are specials */
2380 
2381         case PT_ALNUM:
2382         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2383              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
2384           RRETURN(MATCH_NOMATCH);
2385         break;
2386 
2387         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2388         which means that Perl space and POSIX space are now identical. PCRE
2389         was changed at release 8.34. */
2390 
2391         case PT_SPACE:    /* Perl space */
2392         case PT_PXSPACE:  /* POSIX space */
2393         switch(fc)
2394           {
2395           HSPACE_CASES:
2396           VSPACE_CASES:
2397           if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2398           break;
2399 
2400           default:
2401           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2402             (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2403           break;
2404           }
2405         break;
2406 
2407         case PT_WORD:
2408         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2409              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2410              fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
2411           RRETURN(MATCH_NOMATCH);
2412         break;
2413 
2414         case PT_CLIST:
2415         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2416         for (;;)
2417           {
2418           if (fc < *cp)
2419             { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2420           if (fc == *cp++)
2421             { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2422           }
2423         break;
2424 
2425         case PT_UCNC:
2426         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2427              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2428              fc >= 0xe000) == (Fop == OP_NOTPROP))
2429           RRETURN(MATCH_NOMATCH);
2430         break;
2431 
2432         /* This should never occur */
2433 
2434         default:
2435         return PCRE2_ERROR_INTERNAL;
2436         }
2437 
2438       Fecode += 3;
2439       }
2440     break;
2441 
2442 
2443     /* ===================================================================== */
2444     /* Match an extended Unicode sequence. We will get here only if the support
2445     is in the binary; otherwise a compile-time error occurs. */
2446 
2447     case OP_EXTUNI:
2448     if (Feptr >= mb->end_subject)
2449       {
2450       SCHECK_PARTIAL();
2451       RRETURN(MATCH_NOMATCH);
2452       }
2453     else
2454       {
2455       GETCHARINCTEST(fc, Feptr);
2456       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2457         NULL);
2458       }
2459     CHECK_PARTIAL();
2460     Fecode++;
2461     break;
2462 
2463 #endif  /* SUPPORT_UNICODE */
2464 
2465 
2466     /* ===================================================================== */
2467     /* Match a single character type repeatedly. Note that the property type
2468     does not need to be in a stack frame as it is not used within an RMATCH()
2469     loop. */
2470 
2471 #define Lstart_eptr  F->temp_sptr[0]
2472 #define Lmin         F->temp_32[0]
2473 #define Lmax         F->temp_32[1]
2474 #define Lctype       F->temp_32[2]
2475 #define Lpropvalue   F->temp_32[3]
2476 
2477     case OP_TYPEEXACT:
2478     Lmin = Lmax = GET2(Fecode, 1);
2479     Fecode += 1 + IMM2_SIZE;
2480     goto REPEATTYPE;
2481 
2482     case OP_TYPEUPTO:
2483     case OP_TYPEMINUPTO:
2484     Lmin = 0;
2485     Lmax = GET2(Fecode, 1);
2486     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2487     Fecode += 1 + IMM2_SIZE;
2488     goto REPEATTYPE;
2489 
2490     case OP_TYPEPOSSTAR:
2491     reptype = REPTYPE_POS;
2492     Lmin = 0;
2493     Lmax = UINT32_MAX;
2494     Fecode++;
2495     goto REPEATTYPE;
2496 
2497     case OP_TYPEPOSPLUS:
2498     reptype = REPTYPE_POS;
2499     Lmin = 1;
2500     Lmax = UINT32_MAX;
2501     Fecode++;
2502     goto REPEATTYPE;
2503 
2504     case OP_TYPEPOSQUERY:
2505     reptype = REPTYPE_POS;
2506     Lmin = 0;
2507     Lmax = 1;
2508     Fecode++;
2509     goto REPEATTYPE;
2510 
2511     case OP_TYPEPOSUPTO:
2512     reptype = REPTYPE_POS;
2513     Lmin = 0;
2514     Lmax = GET2(Fecode, 1);
2515     Fecode += 1 + IMM2_SIZE;
2516     goto REPEATTYPE;
2517 
2518     case OP_TYPESTAR:
2519     case OP_TYPEMINSTAR:
2520     case OP_TYPEPLUS:
2521     case OP_TYPEMINPLUS:
2522     case OP_TYPEQUERY:
2523     case OP_TYPEMINQUERY:
2524     fc = *Fecode++ - OP_TYPESTAR;
2525     Lmin = rep_min[fc];
2526     Lmax = rep_max[fc];
2527     reptype = rep_typ[fc];
2528 
2529     /* Common code for all repeated character type matches. */
2530 
2531     REPEATTYPE:
2532     Lctype = *Fecode++;      /* Code for the character type */
2533 
2534 #ifdef SUPPORT_UNICODE
2535     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2536       {
2537       proptype = *Fecode++;
2538       Lpropvalue = *Fecode++;
2539       }
2540     else proptype = -1;
2541 #endif
2542 
2543     /* First, ensure the minimum number of matches are present. Use inline
2544     code for maximizing the speed, and do the type test once at the start
2545     (i.e. keep it out of the loop). The code for UTF mode is separated out for
2546     tidiness, except for Unicode property tests. */
2547 
2548     if (Lmin > 0)
2549       {
2550 #ifdef SUPPORT_UNICODE
2551       if (proptype >= 0)  /* Property tests in all modes */
2552         {
2553         switch(proptype)
2554           {
2555           case PT_ANY:
2556           if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2557           for (i = 1; i <= Lmin; i++)
2558             {
2559             if (Feptr >= mb->end_subject)
2560               {
2561               SCHECK_PARTIAL();
2562               RRETURN(MATCH_NOMATCH);
2563               }
2564             GETCHARINCTEST(fc, Feptr);
2565             }
2566           break;
2567 
2568           case PT_LAMP:
2569           for (i = 1; i <= Lmin; i++)
2570             {
2571             int chartype;
2572             if (Feptr >= mb->end_subject)
2573               {
2574               SCHECK_PARTIAL();
2575               RRETURN(MATCH_NOMATCH);
2576               }
2577             GETCHARINCTEST(fc, Feptr);
2578             chartype = UCD_CHARTYPE(fc);
2579             if ((chartype == ucp_Lu ||
2580                  chartype == ucp_Ll ||
2581                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
2582               RRETURN(MATCH_NOMATCH);
2583             }
2584           break;
2585 
2586           case PT_GC:
2587           for (i = 1; i <= Lmin; i++)
2588             {
2589             if (Feptr >= mb->end_subject)
2590               {
2591               SCHECK_PARTIAL();
2592               RRETURN(MATCH_NOMATCH);
2593               }
2594             GETCHARINCTEST(fc, Feptr);
2595             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2596               RRETURN(MATCH_NOMATCH);
2597             }
2598           break;
2599 
2600           case PT_PC:
2601           for (i = 1; i <= Lmin; i++)
2602             {
2603             if (Feptr >= mb->end_subject)
2604               {
2605               SCHECK_PARTIAL();
2606               RRETURN(MATCH_NOMATCH);
2607               }
2608             GETCHARINCTEST(fc, Feptr);
2609             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2610               RRETURN(MATCH_NOMATCH);
2611             }
2612           break;
2613 
2614           case PT_SC:
2615           for (i = 1; i <= Lmin; i++)
2616             {
2617             if (Feptr >= mb->end_subject)
2618               {
2619               SCHECK_PARTIAL();
2620               RRETURN(MATCH_NOMATCH);
2621               }
2622             GETCHARINCTEST(fc, Feptr);
2623             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2624               RRETURN(MATCH_NOMATCH);
2625             }
2626           break;
2627 
2628           case PT_ALNUM:
2629           for (i = 1; i <= Lmin; i++)
2630             {
2631             int category;
2632             if (Feptr >= mb->end_subject)
2633               {
2634               SCHECK_PARTIAL();
2635               RRETURN(MATCH_NOMATCH);
2636               }
2637             GETCHARINCTEST(fc, Feptr);
2638             category = UCD_CATEGORY(fc);
2639             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
2640               RRETURN(MATCH_NOMATCH);
2641             }
2642           break;
2643 
2644           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2645           which means that Perl space and POSIX space are now identical. PCRE
2646           was changed at release 8.34. */
2647 
2648           case PT_SPACE:    /* Perl space */
2649           case PT_PXSPACE:  /* POSIX space */
2650           for (i = 1; i <= Lmin; i++)
2651             {
2652             if (Feptr >= mb->end_subject)
2653               {
2654               SCHECK_PARTIAL();
2655               RRETURN(MATCH_NOMATCH);
2656               }
2657             GETCHARINCTEST(fc, Feptr);
2658             switch(fc)
2659               {
2660               HSPACE_CASES:
2661               VSPACE_CASES:
2662               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2663               break;
2664 
2665               default:
2666               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
2667                 RRETURN(MATCH_NOMATCH);
2668               break;
2669               }
2670             }
2671           break;
2672 
2673           case PT_WORD:
2674           for (i = 1; i <= Lmin; i++)
2675             {
2676             int category;
2677             if (Feptr >= mb->end_subject)
2678               {
2679               SCHECK_PARTIAL();
2680               RRETURN(MATCH_NOMATCH);
2681               }
2682             GETCHARINCTEST(fc, Feptr);
2683             category = UCD_CATEGORY(fc);
2684             if ((category == ucp_L || category == ucp_N ||
2685                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
2686               RRETURN(MATCH_NOMATCH);
2687             }
2688           break;
2689 
2690           case PT_CLIST:
2691           for (i = 1; i <= Lmin; i++)
2692             {
2693             const uint32_t *cp;
2694             if (Feptr >= mb->end_subject)
2695               {
2696               SCHECK_PARTIAL();
2697               RRETURN(MATCH_NOMATCH);
2698               }
2699             GETCHARINCTEST(fc, Feptr);
2700             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2701             for (;;)
2702               {
2703               if (fc < *cp)
2704                 {
2705                 if (Lctype == OP_NOTPROP) break;
2706                 RRETURN(MATCH_NOMATCH);
2707                 }
2708               if (fc == *cp++)
2709                 {
2710                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2711                 break;
2712                 }
2713               }
2714             }
2715           break;
2716 
2717           case PT_UCNC:
2718           for (i = 1; i <= Lmin; i++)
2719             {
2720             if (Feptr >= mb->end_subject)
2721               {
2722               SCHECK_PARTIAL();
2723               RRETURN(MATCH_NOMATCH);
2724               }
2725             GETCHARINCTEST(fc, Feptr);
2726             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2727                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2728                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
2729               RRETURN(MATCH_NOMATCH);
2730             }
2731           break;
2732 
2733           /* This should not occur */
2734 
2735           default:
2736           return PCRE2_ERROR_INTERNAL;
2737           }
2738         }
2739 
2740       /* Match extended Unicode sequences. We will get here only if the
2741       support is in the binary; otherwise a compile-time error occurs. */
2742 
2743       else if (Lctype == OP_EXTUNI)
2744         {
2745         for (i = 1; i <= Lmin; i++)
2746           {
2747           if (Feptr >= mb->end_subject)
2748             {
2749             SCHECK_PARTIAL();
2750             RRETURN(MATCH_NOMATCH);
2751             }
2752           else
2753             {
2754             GETCHARINCTEST(fc, Feptr);
2755             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2756               mb->end_subject, utf, NULL);
2757             }
2758           CHECK_PARTIAL();
2759           }
2760         }
2761       else
2762 #endif     /* SUPPORT_UNICODE */
2763 
2764 /* Handle all other cases in UTF mode */
2765 
2766 #ifdef SUPPORT_UNICODE
2767       if (utf) switch(Lctype)
2768         {
2769         case OP_ANY:
2770         for (i = 1; i <= Lmin; i++)
2771           {
2772           if (Feptr >= mb->end_subject)
2773             {
2774             SCHECK_PARTIAL();
2775             RRETURN(MATCH_NOMATCH);
2776             }
2777           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2778           if (mb->partial != 0 &&
2779               Feptr + 1 >= mb->end_subject &&
2780               NLBLOCK->nltype == NLTYPE_FIXED &&
2781               NLBLOCK->nllen == 2 &&
2782               UCHAR21(Feptr) == NLBLOCK->nl[0])
2783             {
2784             mb->hitend = TRUE;
2785             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2786             }
2787           Feptr++;
2788           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2789           }
2790         break;
2791 
2792         case OP_ALLANY:
2793         for (i = 1; i <= Lmin; i++)
2794           {
2795           if (Feptr >= mb->end_subject)
2796             {
2797             SCHECK_PARTIAL();
2798             RRETURN(MATCH_NOMATCH);
2799             }
2800           Feptr++;
2801           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2802           }
2803         break;
2804 
2805         case OP_ANYBYTE:
2806         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2807         Feptr += Lmin;
2808         break;
2809 
2810         case OP_ANYNL:
2811         for (i = 1; i <= Lmin; i++)
2812           {
2813           if (Feptr >= mb->end_subject)
2814             {
2815             SCHECK_PARTIAL();
2816             RRETURN(MATCH_NOMATCH);
2817             }
2818           GETCHARINC(fc, Feptr);
2819           switch(fc)
2820             {
2821             default: RRETURN(MATCH_NOMATCH);
2822 
2823             case CHAR_CR:
2824             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2825             break;
2826 
2827             case CHAR_LF:
2828             break;
2829 
2830             case CHAR_VT:
2831             case CHAR_FF:
2832             case CHAR_NEL:
2833 #ifndef EBCDIC
2834             case 0x2028:
2835             case 0x2029:
2836 #endif  /* Not EBCDIC */
2837             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2838             break;
2839             }
2840           }
2841         break;
2842 
2843         case OP_NOT_HSPACE:
2844         for (i = 1; i <= Lmin; i++)
2845           {
2846           if (Feptr >= mb->end_subject)
2847             {
2848             SCHECK_PARTIAL();
2849             RRETURN(MATCH_NOMATCH);
2850             }
2851           GETCHARINC(fc, Feptr);
2852           switch(fc)
2853             {
2854             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
2855             default: break;
2856             }
2857           }
2858         break;
2859 
2860         case OP_HSPACE:
2861         for (i = 1; i <= Lmin; i++)
2862           {
2863           if (Feptr >= mb->end_subject)
2864             {
2865             SCHECK_PARTIAL();
2866             RRETURN(MATCH_NOMATCH);
2867             }
2868           GETCHARINC(fc, Feptr);
2869           switch(fc)
2870             {
2871             HSPACE_CASES: break;
2872             default: RRETURN(MATCH_NOMATCH);
2873             }
2874           }
2875         break;
2876 
2877         case OP_NOT_VSPACE:
2878         for (i = 1; i <= Lmin; i++)
2879           {
2880           if (Feptr >= mb->end_subject)
2881             {
2882             SCHECK_PARTIAL();
2883             RRETURN(MATCH_NOMATCH);
2884             }
2885           GETCHARINC(fc, Feptr);
2886           switch(fc)
2887             {
2888             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2889             default: break;
2890             }
2891           }
2892         break;
2893 
2894         case OP_VSPACE:
2895         for (i = 1; i <= Lmin; i++)
2896           {
2897           if (Feptr >= mb->end_subject)
2898             {
2899             SCHECK_PARTIAL();
2900             RRETURN(MATCH_NOMATCH);
2901             }
2902           GETCHARINC(fc, Feptr);
2903           switch(fc)
2904             {
2905             VSPACE_CASES: break;
2906             default: RRETURN(MATCH_NOMATCH);
2907             }
2908           }
2909         break;
2910 
2911         case OP_NOT_DIGIT:
2912         for (i = 1; i <= Lmin; i++)
2913           {
2914           if (Feptr >= mb->end_subject)
2915             {
2916             SCHECK_PARTIAL();
2917             RRETURN(MATCH_NOMATCH);
2918             }
2919           GETCHARINC(fc, Feptr);
2920           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
2921             RRETURN(MATCH_NOMATCH);
2922           }
2923         break;
2924 
2925         case OP_DIGIT:
2926         for (i = 1; i <= Lmin; i++)
2927           {
2928           uint32_t cc;
2929           if (Feptr >= mb->end_subject)
2930             {
2931             SCHECK_PARTIAL();
2932             RRETURN(MATCH_NOMATCH);
2933             }
2934           cc = UCHAR21(Feptr);
2935           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
2936             RRETURN(MATCH_NOMATCH);
2937           Feptr++;
2938           /* No need to skip more code units - we know it has only one. */
2939           }
2940         break;
2941 
2942         case OP_NOT_WHITESPACE:
2943         for (i = 1; i <= Lmin; i++)
2944           {
2945           uint32_t cc;
2946           if (Feptr >= mb->end_subject)
2947             {
2948             SCHECK_PARTIAL();
2949             RRETURN(MATCH_NOMATCH);
2950             }
2951           cc = UCHAR21(Feptr);
2952           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
2953             RRETURN(MATCH_NOMATCH);
2954           Feptr++;
2955           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2956           }
2957         break;
2958 
2959         case OP_WHITESPACE:
2960         for (i = 1; i <= Lmin; i++)
2961           {
2962           uint32_t cc;
2963           if (Feptr >= mb->end_subject)
2964             {
2965             SCHECK_PARTIAL();
2966             RRETURN(MATCH_NOMATCH);
2967             }
2968           cc = UCHAR21(Feptr);
2969           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
2970             RRETURN(MATCH_NOMATCH);
2971           Feptr++;
2972           /* No need to skip more code units - we know it has only one. */
2973           }
2974         break;
2975 
2976         case OP_NOT_WORDCHAR:
2977         for (i = 1; i <= Lmin; i++)
2978           {
2979           uint32_t cc;
2980           if (Feptr >= mb->end_subject)
2981             {
2982             SCHECK_PARTIAL();
2983             RRETURN(MATCH_NOMATCH);
2984             }
2985           cc = UCHAR21(Feptr);
2986           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
2987             RRETURN(MATCH_NOMATCH);
2988           Feptr++;
2989           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2990           }
2991         break;
2992 
2993         case OP_WORDCHAR:
2994         for (i = 1; i <= Lmin; i++)
2995           {
2996           uint32_t cc;
2997           if (Feptr >= mb->end_subject)
2998             {
2999             SCHECK_PARTIAL();
3000             RRETURN(MATCH_NOMATCH);
3001             }
3002           cc = UCHAR21(Feptr);
3003           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3004             RRETURN(MATCH_NOMATCH);
3005           Feptr++;
3006           /* No need to skip more code units - we know it has only one. */
3007           }
3008         break;
3009 
3010         default:
3011         return PCRE2_ERROR_INTERNAL;
3012         }  /* End switch(Lctype) */
3013 
3014       else
3015 #endif     /* SUPPORT_UNICODE */
3016 
3017       /* Code for the non-UTF case for minimum matching of operators other
3018       than OP_PROP and OP_NOTPROP. */
3019 
3020       switch(Lctype)
3021         {
3022         case OP_ANY:
3023         for (i = 1; i <= Lmin; i++)
3024           {
3025           if (Feptr >= mb->end_subject)
3026             {
3027             SCHECK_PARTIAL();
3028             RRETURN(MATCH_NOMATCH);
3029             }
3030           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3031           if (mb->partial != 0 &&
3032               Feptr + 1 >= mb->end_subject &&
3033               NLBLOCK->nltype == NLTYPE_FIXED &&
3034               NLBLOCK->nllen == 2 &&
3035               *Feptr == NLBLOCK->nl[0])
3036             {
3037             mb->hitend = TRUE;
3038             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3039             }
3040           Feptr++;
3041           }
3042         break;
3043 
3044         case OP_ALLANY:
3045         if (Feptr > mb->end_subject - Lmin)
3046           {
3047           SCHECK_PARTIAL();
3048           RRETURN(MATCH_NOMATCH);
3049           }
3050         Feptr += Lmin;
3051         break;
3052 
3053         /* This OP_ANYBYTE case will never be reached because \C gets turned
3054         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3055         reports don't complain about it's never being used. */
3056 
3057 /*        case OP_ANYBYTE:
3058 *        if (Feptr > mb->end_subject - Lmin)
3059 *          {
3060 *          SCHECK_PARTIAL();
3061 *          RRETURN(MATCH_NOMATCH);
3062 *          }
3063 *        Feptr += Lmin;
3064 *        break;
3065 */
3066         case OP_ANYNL:
3067         for (i = 1; i <= Lmin; i++)
3068           {
3069           if (Feptr >= mb->end_subject)
3070             {
3071             SCHECK_PARTIAL();
3072             RRETURN(MATCH_NOMATCH);
3073             }
3074           switch(*Feptr++)
3075             {
3076             default: RRETURN(MATCH_NOMATCH);
3077 
3078             case CHAR_CR:
3079             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3080             break;
3081 
3082             case CHAR_LF:
3083             break;
3084 
3085             case CHAR_VT:
3086             case CHAR_FF:
3087             case CHAR_NEL:
3088 #if PCRE2_CODE_UNIT_WIDTH != 8
3089             case 0x2028:
3090             case 0x2029:
3091 #endif
3092             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3093             break;
3094             }
3095           }
3096         break;
3097 
3098         case OP_NOT_HSPACE:
3099         for (i = 1; i <= Lmin; i++)
3100           {
3101           if (Feptr >= mb->end_subject)
3102             {
3103             SCHECK_PARTIAL();
3104             RRETURN(MATCH_NOMATCH);
3105             }
3106           switch(*Feptr++)
3107             {
3108             default: break;
3109             HSPACE_BYTE_CASES:
3110 #if PCRE2_CODE_UNIT_WIDTH != 8
3111             HSPACE_MULTIBYTE_CASES:
3112 #endif
3113             RRETURN(MATCH_NOMATCH);
3114             }
3115           }
3116         break;
3117 
3118         case OP_HSPACE:
3119         for (i = 1; i <= Lmin; i++)
3120           {
3121           if (Feptr >= mb->end_subject)
3122             {
3123             SCHECK_PARTIAL();
3124             RRETURN(MATCH_NOMATCH);
3125             }
3126           switch(*Feptr++)
3127             {
3128             default: RRETURN(MATCH_NOMATCH);
3129             HSPACE_BYTE_CASES:
3130 #if PCRE2_CODE_UNIT_WIDTH != 8
3131             HSPACE_MULTIBYTE_CASES:
3132 #endif
3133             break;
3134             }
3135           }
3136         break;
3137 
3138         case OP_NOT_VSPACE:
3139         for (i = 1; i <= Lmin; i++)
3140           {
3141           if (Feptr >= mb->end_subject)
3142             {
3143             SCHECK_PARTIAL();
3144             RRETURN(MATCH_NOMATCH);
3145             }
3146           switch(*Feptr++)
3147             {
3148             VSPACE_BYTE_CASES:
3149 #if PCRE2_CODE_UNIT_WIDTH != 8
3150             VSPACE_MULTIBYTE_CASES:
3151 #endif
3152             RRETURN(MATCH_NOMATCH);
3153             default: break;
3154             }
3155           }
3156         break;
3157 
3158         case OP_VSPACE:
3159         for (i = 1; i <= Lmin; i++)
3160           {
3161           if (Feptr >= mb->end_subject)
3162             {
3163             SCHECK_PARTIAL();
3164             RRETURN(MATCH_NOMATCH);
3165             }
3166           switch(*Feptr++)
3167             {
3168             default: RRETURN(MATCH_NOMATCH);
3169             VSPACE_BYTE_CASES:
3170 #if PCRE2_CODE_UNIT_WIDTH != 8
3171             VSPACE_MULTIBYTE_CASES:
3172 #endif
3173             break;
3174             }
3175           }
3176         break;
3177 
3178         case OP_NOT_DIGIT:
3179         for (i = 1; i <= Lmin; i++)
3180           {
3181           if (Feptr >= mb->end_subject)
3182             {
3183             SCHECK_PARTIAL();
3184             RRETURN(MATCH_NOMATCH);
3185             }
3186           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3187             RRETURN(MATCH_NOMATCH);
3188           Feptr++;
3189           }
3190         break;
3191 
3192         case OP_DIGIT:
3193         for (i = 1; i <= Lmin; i++)
3194           {
3195           if (Feptr >= mb->end_subject)
3196             {
3197             SCHECK_PARTIAL();
3198             RRETURN(MATCH_NOMATCH);
3199             }
3200           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3201             RRETURN(MATCH_NOMATCH);
3202           Feptr++;
3203           }
3204         break;
3205 
3206         case OP_NOT_WHITESPACE:
3207         for (i = 1; i <= Lmin; i++)
3208           {
3209           if (Feptr >= mb->end_subject)
3210             {
3211             SCHECK_PARTIAL();
3212             RRETURN(MATCH_NOMATCH);
3213             }
3214           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3215             RRETURN(MATCH_NOMATCH);
3216           Feptr++;
3217           }
3218         break;
3219 
3220         case OP_WHITESPACE:
3221         for (i = 1; i <= Lmin; i++)
3222           {
3223           if (Feptr >= mb->end_subject)
3224             {
3225             SCHECK_PARTIAL();
3226             RRETURN(MATCH_NOMATCH);
3227             }
3228           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3229             RRETURN(MATCH_NOMATCH);
3230           Feptr++;
3231           }
3232         break;
3233 
3234         case OP_NOT_WORDCHAR:
3235         for (i = 1; i <= Lmin; i++)
3236           {
3237           if (Feptr >= mb->end_subject)
3238             {
3239             SCHECK_PARTIAL();
3240             RRETURN(MATCH_NOMATCH);
3241             }
3242           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3243             RRETURN(MATCH_NOMATCH);
3244           Feptr++;
3245           }
3246         break;
3247 
3248         case OP_WORDCHAR:
3249         for (i = 1; i <= Lmin; i++)
3250           {
3251           if (Feptr >= mb->end_subject)
3252             {
3253             SCHECK_PARTIAL();
3254             RRETURN(MATCH_NOMATCH);
3255             }
3256           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3257             RRETURN(MATCH_NOMATCH);
3258           Feptr++;
3259           }
3260         break;
3261 
3262         default:
3263         return PCRE2_ERROR_INTERNAL;
3264         }
3265       }
3266 
3267     /* If Lmin = Lmax we are done. Continue with the main loop. */
3268 
3269     if (Lmin == Lmax) continue;
3270 
3271     /* If minimizing, we have to test the rest of the pattern before each
3272     subsequent match. */
3273 
3274     if (reptype == REPTYPE_MIN)
3275       {
3276 #ifdef SUPPORT_UNICODE
3277       if (proptype >= 0)
3278         {
3279         switch(proptype)
3280           {
3281           case PT_ANY:
3282           for (;;)
3283             {
3284             RMATCH(Fecode, RM208);
3285             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3286             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3287             if (Feptr >= mb->end_subject)
3288               {
3289               SCHECK_PARTIAL();
3290               RRETURN(MATCH_NOMATCH);
3291               }
3292             GETCHARINCTEST(fc, Feptr);
3293             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3294             }
3295           /* Control never gets here */
3296 
3297           case PT_LAMP:
3298           for (;;)
3299             {
3300             int chartype;
3301             RMATCH(Fecode, RM209);
3302             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3303             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3304             if (Feptr >= mb->end_subject)
3305               {
3306               SCHECK_PARTIAL();
3307               RRETURN(MATCH_NOMATCH);
3308               }
3309             GETCHARINCTEST(fc, Feptr);
3310             chartype = UCD_CHARTYPE(fc);
3311             if ((chartype == ucp_Lu ||
3312                  chartype == ucp_Ll ||
3313                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3314               RRETURN(MATCH_NOMATCH);
3315             }
3316           /* Control never gets here */
3317 
3318           case PT_GC:
3319           for (;;)
3320             {
3321             RMATCH(Fecode, RM210);
3322             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3323             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3324             if (Feptr >= mb->end_subject)
3325               {
3326               SCHECK_PARTIAL();
3327               RRETURN(MATCH_NOMATCH);
3328               }
3329             GETCHARINCTEST(fc, Feptr);
3330             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3331               RRETURN(MATCH_NOMATCH);
3332             }
3333           /* Control never gets here */
3334 
3335           case PT_PC:
3336           for (;;)
3337             {
3338             RMATCH(Fecode, RM211);
3339             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3340             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3341             if (Feptr >= mb->end_subject)
3342               {
3343               SCHECK_PARTIAL();
3344               RRETURN(MATCH_NOMATCH);
3345               }
3346             GETCHARINCTEST(fc, Feptr);
3347             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3348               RRETURN(MATCH_NOMATCH);
3349             }
3350           /* Control never gets here */
3351 
3352           case PT_SC:
3353           for (;;)
3354             {
3355             RMATCH(Fecode, RM212);
3356             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3357             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3358             if (Feptr >= mb->end_subject)
3359               {
3360               SCHECK_PARTIAL();
3361               RRETURN(MATCH_NOMATCH);
3362               }
3363             GETCHARINCTEST(fc, Feptr);
3364             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3365               RRETURN(MATCH_NOMATCH);
3366             }
3367           /* Control never gets here */
3368 
3369           case PT_ALNUM:
3370           for (;;)
3371             {
3372             int category;
3373             RMATCH(Fecode, RM213);
3374             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3375             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3376             if (Feptr >= mb->end_subject)
3377               {
3378               SCHECK_PARTIAL();
3379               RRETURN(MATCH_NOMATCH);
3380               }
3381             GETCHARINCTEST(fc, Feptr);
3382             category = UCD_CATEGORY(fc);
3383             if ((category == ucp_L || category == ucp_N) ==
3384                 (Lctype == OP_NOTPROP))
3385               RRETURN(MATCH_NOMATCH);
3386             }
3387           /* Control never gets here */
3388 
3389           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3390           which means that Perl space and POSIX space are now identical. PCRE
3391           was changed at release 8.34. */
3392 
3393           case PT_SPACE:    /* Perl space */
3394           case PT_PXSPACE:  /* POSIX space */
3395           for (;;)
3396             {
3397             RMATCH(Fecode, RM214);
3398             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3400             if (Feptr >= mb->end_subject)
3401               {
3402               SCHECK_PARTIAL();
3403               RRETURN(MATCH_NOMATCH);
3404               }
3405             GETCHARINCTEST(fc, Feptr);
3406             switch(fc)
3407               {
3408               HSPACE_CASES:
3409               VSPACE_CASES:
3410               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3411               break;
3412 
3413               default:
3414               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3415                 RRETURN(MATCH_NOMATCH);
3416               break;
3417               }
3418             }
3419           /* Control never gets here */
3420 
3421           case PT_WORD:
3422           for (;;)
3423             {
3424             int category;
3425             RMATCH(Fecode, RM215);
3426             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3427             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3428             if (Feptr >= mb->end_subject)
3429               {
3430               SCHECK_PARTIAL();
3431               RRETURN(MATCH_NOMATCH);
3432               }
3433             GETCHARINCTEST(fc, Feptr);
3434             category = UCD_CATEGORY(fc);
3435             if ((category == ucp_L ||
3436                  category == ucp_N ||
3437                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3438               RRETURN(MATCH_NOMATCH);
3439             }
3440           /* Control never gets here */
3441 
3442           case PT_CLIST:
3443           for (;;)
3444             {
3445             const uint32_t *cp;
3446             RMATCH(Fecode, RM216);
3447             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3449             if (Feptr >= mb->end_subject)
3450               {
3451               SCHECK_PARTIAL();
3452               RRETURN(MATCH_NOMATCH);
3453               }
3454             GETCHARINCTEST(fc, Feptr);
3455             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3456             for (;;)
3457               {
3458               if (fc < *cp)
3459                 {
3460                 if (Lctype == OP_NOTPROP) break;
3461                 RRETURN(MATCH_NOMATCH);
3462                 }
3463               if (fc == *cp++)
3464                 {
3465                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3466                 break;
3467                 }
3468               }
3469             }
3470           /* Control never gets here */
3471 
3472           case PT_UCNC:
3473           for (;;)
3474             {
3475             RMATCH(Fecode, RM217);
3476             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3477             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3478             if (Feptr >= mb->end_subject)
3479               {
3480               SCHECK_PARTIAL();
3481               RRETURN(MATCH_NOMATCH);
3482               }
3483             GETCHARINCTEST(fc, Feptr);
3484             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3485                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3486                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3487               RRETURN(MATCH_NOMATCH);
3488             }
3489           /* Control never gets here */
3490 
3491           /* This should never occur */
3492           default:
3493           return PCRE2_ERROR_INTERNAL;
3494           }
3495         }
3496 
3497       /* Match extended Unicode sequences. We will get here only if the
3498       support is in the binary; otherwise a compile-time error occurs. */
3499 
3500       else if (Lctype == OP_EXTUNI)
3501         {
3502         for (;;)
3503           {
3504           RMATCH(Fecode, RM218);
3505           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3506           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3507           if (Feptr >= mb->end_subject)
3508             {
3509             SCHECK_PARTIAL();
3510             RRETURN(MATCH_NOMATCH);
3511             }
3512           else
3513             {
3514             GETCHARINCTEST(fc, Feptr);
3515             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3516               utf, NULL);
3517             }
3518           CHECK_PARTIAL();
3519           }
3520         }
3521       else
3522 #endif     /* SUPPORT_UNICODE */
3523 
3524       /* UTF mode for non-property testing character types. */
3525 
3526 #ifdef SUPPORT_UNICODE
3527       if (utf)
3528         {
3529         for (;;)
3530           {
3531           RMATCH(Fecode, RM219);
3532           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3533           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3534           if (Feptr >= mb->end_subject)
3535             {
3536             SCHECK_PARTIAL();
3537             RRETURN(MATCH_NOMATCH);
3538             }
3539           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3540           GETCHARINC(fc, Feptr);
3541           switch(Lctype)
3542             {
3543             case OP_ANY:               /* This is the non-NL case */
3544             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3545                 Feptr >= mb->end_subject &&
3546                 NLBLOCK->nltype == NLTYPE_FIXED &&
3547                 NLBLOCK->nllen == 2 &&
3548                 fc == NLBLOCK->nl[0])
3549               {
3550               mb->hitend = TRUE;
3551               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3552               }
3553             break;
3554 
3555             case OP_ALLANY:
3556             case OP_ANYBYTE:
3557             break;
3558 
3559             case OP_ANYNL:
3560             switch(fc)
3561               {
3562               default: RRETURN(MATCH_NOMATCH);
3563 
3564               case CHAR_CR:
3565               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3566               break;
3567 
3568               case CHAR_LF:
3569               break;
3570 
3571               case CHAR_VT:
3572               case CHAR_FF:
3573               case CHAR_NEL:
3574 #ifndef EBCDIC
3575               case 0x2028:
3576               case 0x2029:
3577 #endif  /* Not EBCDIC */
3578               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3579                 RRETURN(MATCH_NOMATCH);
3580               break;
3581               }
3582             break;
3583 
3584             case OP_NOT_HSPACE:
3585             switch(fc)
3586               {
3587               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3588               default: break;
3589               }
3590             break;
3591 
3592             case OP_HSPACE:
3593             switch(fc)
3594               {
3595               HSPACE_CASES: break;
3596               default: RRETURN(MATCH_NOMATCH);
3597               }
3598             break;
3599 
3600             case OP_NOT_VSPACE:
3601             switch(fc)
3602               {
3603               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3604               default: break;
3605               }
3606             break;
3607 
3608             case OP_VSPACE:
3609             switch(fc)
3610               {
3611               VSPACE_CASES: break;
3612               default: RRETURN(MATCH_NOMATCH);
3613               }
3614             break;
3615 
3616             case OP_NOT_DIGIT:
3617             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3618               RRETURN(MATCH_NOMATCH);
3619             break;
3620 
3621             case OP_DIGIT:
3622             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3623               RRETURN(MATCH_NOMATCH);
3624             break;
3625 
3626             case OP_NOT_WHITESPACE:
3627             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3628               RRETURN(MATCH_NOMATCH);
3629             break;
3630 
3631             case OP_WHITESPACE:
3632             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3633               RRETURN(MATCH_NOMATCH);
3634             break;
3635 
3636             case OP_NOT_WORDCHAR:
3637             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3638               RRETURN(MATCH_NOMATCH);
3639             break;
3640 
3641             case OP_WORDCHAR:
3642             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3643               RRETURN(MATCH_NOMATCH);
3644             break;
3645 
3646             default:
3647             return PCRE2_ERROR_INTERNAL;
3648             }
3649           }
3650         }
3651       else
3652 #endif  /* SUPPORT_UNICODE */
3653 
3654       /* Not UTF mode */
3655         {
3656         for (;;)
3657           {
3658           RMATCH(Fecode, RM33);
3659           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3660           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3661           if (Feptr >= mb->end_subject)
3662             {
3663             SCHECK_PARTIAL();
3664             RRETURN(MATCH_NOMATCH);
3665             }
3666           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3667             RRETURN(MATCH_NOMATCH);
3668           fc = *Feptr++;
3669           switch(Lctype)
3670             {
3671             case OP_ANY:               /* This is the non-NL case */
3672             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3673                 Feptr >= mb->end_subject &&
3674                 NLBLOCK->nltype == NLTYPE_FIXED &&
3675                 NLBLOCK->nllen == 2 &&
3676                 fc == NLBLOCK->nl[0])
3677               {
3678               mb->hitend = TRUE;
3679               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3680               }
3681             break;
3682 
3683             case OP_ALLANY:
3684             case OP_ANYBYTE:
3685             break;
3686 
3687             case OP_ANYNL:
3688             switch(fc)
3689               {
3690               default: RRETURN(MATCH_NOMATCH);
3691 
3692               case CHAR_CR:
3693               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3694               break;
3695 
3696               case CHAR_LF:
3697               break;
3698 
3699               case CHAR_VT:
3700               case CHAR_FF:
3701               case CHAR_NEL:
3702 #if PCRE2_CODE_UNIT_WIDTH != 8
3703               case 0x2028:
3704               case 0x2029:
3705 #endif
3706               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3707                 RRETURN(MATCH_NOMATCH);
3708               break;
3709               }
3710             break;
3711 
3712             case OP_NOT_HSPACE:
3713             switch(fc)
3714               {
3715               default: break;
3716               HSPACE_BYTE_CASES:
3717 #if PCRE2_CODE_UNIT_WIDTH != 8
3718               HSPACE_MULTIBYTE_CASES:
3719 #endif
3720               RRETURN(MATCH_NOMATCH);
3721               }
3722             break;
3723 
3724             case OP_HSPACE:
3725             switch(fc)
3726               {
3727               default: RRETURN(MATCH_NOMATCH);
3728               HSPACE_BYTE_CASES:
3729 #if PCRE2_CODE_UNIT_WIDTH != 8
3730               HSPACE_MULTIBYTE_CASES:
3731 #endif
3732               break;
3733               }
3734             break;
3735 
3736             case OP_NOT_VSPACE:
3737             switch(fc)
3738               {
3739               default: break;
3740               VSPACE_BYTE_CASES:
3741 #if PCRE2_CODE_UNIT_WIDTH != 8
3742               VSPACE_MULTIBYTE_CASES:
3743 #endif
3744               RRETURN(MATCH_NOMATCH);
3745               }
3746             break;
3747 
3748             case OP_VSPACE:
3749             switch(fc)
3750               {
3751               default: RRETURN(MATCH_NOMATCH);
3752               VSPACE_BYTE_CASES:
3753 #if PCRE2_CODE_UNIT_WIDTH != 8
3754               VSPACE_MULTIBYTE_CASES:
3755 #endif
3756               break;
3757               }
3758             break;
3759 
3760             case OP_NOT_DIGIT:
3761             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3762               RRETURN(MATCH_NOMATCH);
3763             break;
3764 
3765             case OP_DIGIT:
3766             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3767               RRETURN(MATCH_NOMATCH);
3768             break;
3769 
3770             case OP_NOT_WHITESPACE:
3771             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3772               RRETURN(MATCH_NOMATCH);
3773             break;
3774 
3775             case OP_WHITESPACE:
3776             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
3777               RRETURN(MATCH_NOMATCH);
3778             break;
3779 
3780             case OP_NOT_WORDCHAR:
3781             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
3782               RRETURN(MATCH_NOMATCH);
3783             break;
3784 
3785             case OP_WORDCHAR:
3786             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
3787               RRETURN(MATCH_NOMATCH);
3788             break;
3789 
3790             default:
3791             return PCRE2_ERROR_INTERNAL;
3792             }
3793           }
3794         }
3795       /* Control never gets here */
3796       }
3797 
3798     /* If maximizing, it is worth using inline code for speed, doing the type
3799     test once at the start (i.e. keep it out of the loop). */
3800 
3801     else
3802       {
3803       Lstart_eptr = Feptr;  /* Remember where we started */
3804 
3805 #ifdef SUPPORT_UNICODE
3806       if (proptype >= 0)
3807         {
3808         switch(proptype)
3809           {
3810           case PT_ANY:
3811           for (i = Lmin; i < Lmax; i++)
3812             {
3813             int len = 1;
3814             if (Feptr >= mb->end_subject)
3815               {
3816               SCHECK_PARTIAL();
3817               break;
3818               }
3819             GETCHARLENTEST(fc, Feptr, len);
3820             if (Lctype == OP_NOTPROP) break;
3821             Feptr+= len;
3822             }
3823           break;
3824 
3825           case PT_LAMP:
3826           for (i = Lmin; i < Lmax; i++)
3827             {
3828             int chartype;
3829             int len = 1;
3830             if (Feptr >= mb->end_subject)
3831               {
3832               SCHECK_PARTIAL();
3833               break;
3834               }
3835             GETCHARLENTEST(fc, Feptr, len);
3836             chartype = UCD_CHARTYPE(fc);
3837             if ((chartype == ucp_Lu ||
3838                  chartype == ucp_Ll ||
3839                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3840               break;
3841             Feptr+= len;
3842             }
3843           break;
3844 
3845           case PT_GC:
3846           for (i = Lmin; i < Lmax; i++)
3847             {
3848             int len = 1;
3849             if (Feptr >= mb->end_subject)
3850               {
3851               SCHECK_PARTIAL();
3852               break;
3853               }
3854             GETCHARLENTEST(fc, Feptr, len);
3855             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3856               break;
3857             Feptr+= len;
3858             }
3859           break;
3860 
3861           case PT_PC:
3862           for (i = Lmin; i < Lmax; i++)
3863             {
3864             int len = 1;
3865             if (Feptr >= mb->end_subject)
3866               {
3867               SCHECK_PARTIAL();
3868               break;
3869               }
3870             GETCHARLENTEST(fc, Feptr, len);
3871             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3872               break;
3873             Feptr+= len;
3874             }
3875           break;
3876 
3877           case PT_SC:
3878           for (i = Lmin; i < Lmax; i++)
3879             {
3880             int len = 1;
3881             if (Feptr >= mb->end_subject)
3882               {
3883               SCHECK_PARTIAL();
3884               break;
3885               }
3886             GETCHARLENTEST(fc, Feptr, len);
3887             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3888               break;
3889             Feptr+= len;
3890             }
3891           break;
3892 
3893           case PT_ALNUM:
3894           for (i = Lmin; i < Lmax; i++)
3895             {
3896             int category;
3897             int len = 1;
3898             if (Feptr >= mb->end_subject)
3899               {
3900               SCHECK_PARTIAL();
3901               break;
3902               }
3903             GETCHARLENTEST(fc, Feptr, len);
3904             category = UCD_CATEGORY(fc);
3905             if ((category == ucp_L || category == ucp_N) ==
3906                 (Lctype == OP_NOTPROP))
3907               break;
3908             Feptr+= len;
3909             }
3910           break;
3911 
3912           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3913           which means that Perl space and POSIX space are now identical. PCRE
3914           was changed at release 8.34. */
3915 
3916           case PT_SPACE:    /* Perl space */
3917           case PT_PXSPACE:  /* POSIX space */
3918           for (i = Lmin; i < Lmax; i++)
3919             {
3920             int len = 1;
3921             if (Feptr >= mb->end_subject)
3922               {
3923               SCHECK_PARTIAL();
3924               break;
3925               }
3926             GETCHARLENTEST(fc, Feptr, len);
3927             switch(fc)
3928               {
3929               HSPACE_CASES:
3930               VSPACE_CASES:
3931               if (Lctype == OP_NOTPROP) goto ENDLOOP99;  /* Break the loop */
3932               break;
3933 
3934               default:
3935               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3936                 goto ENDLOOP99;   /* Break the loop */
3937               break;
3938               }
3939             Feptr+= len;
3940             }
3941           ENDLOOP99:
3942           break;
3943 
3944           case PT_WORD:
3945           for (i = Lmin; i < Lmax; i++)
3946             {
3947             int category;
3948             int len = 1;
3949             if (Feptr >= mb->end_subject)
3950               {
3951               SCHECK_PARTIAL();
3952               break;
3953               }
3954             GETCHARLENTEST(fc, Feptr, len);
3955             category = UCD_CATEGORY(fc);
3956             if ((category == ucp_L || category == ucp_N ||
3957                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3958               break;
3959             Feptr+= len;
3960             }
3961           break;
3962 
3963           case PT_CLIST:
3964           for (i = Lmin; i < Lmax; i++)
3965             {
3966             const uint32_t *cp;
3967             int len = 1;
3968             if (Feptr >= mb->end_subject)
3969               {
3970               SCHECK_PARTIAL();
3971               break;
3972               }
3973             GETCHARLENTEST(fc, Feptr, len);
3974             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3975             for (;;)
3976               {
3977               if (fc < *cp)
3978                 { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
3979               if (fc == *cp++)
3980                 { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
3981               }
3982             Feptr += len;
3983             }
3984           GOT_MAX:
3985           break;
3986 
3987           case PT_UCNC:
3988           for (i = Lmin; i < Lmax; i++)
3989             {
3990             int len = 1;
3991             if (Feptr >= mb->end_subject)
3992               {
3993               SCHECK_PARTIAL();
3994               break;
3995               }
3996             GETCHARLENTEST(fc, Feptr, len);
3997             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3998                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3999                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
4000               break;
4001             Feptr += len;
4002             }
4003           break;
4004 
4005           default:
4006           return PCRE2_ERROR_INTERNAL;
4007           }
4008 
4009         /* Feptr is now past the end of the maximum run */
4010 
4011         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4012 
4013         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4014         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4015         go too far. */
4016 
4017         for(;;)
4018           {
4019           if (Feptr <= Lstart_eptr) break;
4020           RMATCH(Fecode, RM222);
4021           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4022           Feptr--;
4023           if (utf) BACKCHAR(Feptr);
4024           }
4025         }
4026 
4027       /* Match extended Unicode grapheme clusters. We will get here only if the
4028       support is in the binary; otherwise a compile-time error occurs. */
4029 
4030       else if (Lctype == OP_EXTUNI)
4031         {
4032         for (i = Lmin; i < Lmax; i++)
4033           {
4034           if (Feptr >= mb->end_subject)
4035             {
4036             SCHECK_PARTIAL();
4037             break;
4038             }
4039           else
4040             {
4041             GETCHARINCTEST(fc, Feptr);
4042             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4043               utf, NULL);
4044             }
4045           CHECK_PARTIAL();
4046           }
4047 
4048         /* Feptr is now past the end of the maximum run */
4049 
4050         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4051 
4052         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4053         of the run while backtracking because the use of \C in UTF mode can
4054         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4055         the use of \C in UTF mode is fraught with danger. */
4056 
4057         for(;;)
4058           {
4059           int lgb, rgb;
4060           PCRE2_SPTR fptr;
4061 
4062           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4063           RMATCH(Fecode, RM220);
4064           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4065 
4066           /* Backtracking over an extended grapheme cluster involves inspecting
4067           the previous two characters (if present) to see if a break is
4068           permitted between them. */
4069 
4070           Feptr--;
4071           if (!utf) fc = *Feptr; else
4072             {
4073             BACKCHAR(Feptr);
4074             GETCHAR(fc, Feptr);
4075             }
4076           rgb = UCD_GRAPHBREAK(fc);
4077 
4078           for (;;)
4079             {
4080             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4081             fptr = Feptr - 1;
4082             if (!utf) fc = *fptr; else
4083               {
4084               BACKCHAR(fptr);
4085               GETCHAR(fc, fptr);
4086               }
4087             lgb = UCD_GRAPHBREAK(fc);
4088             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4089             Feptr = fptr;
4090             rgb = lgb;
4091             }
4092           }
4093         }
4094 
4095       else
4096 #endif   /* SUPPORT_UNICODE */
4097 
4098 #ifdef SUPPORT_UNICODE
4099       if (utf)
4100         {
4101         switch(Lctype)
4102           {
4103           case OP_ANY:
4104           for (i = Lmin; i < Lmax; i++)
4105             {
4106             if (Feptr >= mb->end_subject)
4107               {
4108               SCHECK_PARTIAL();
4109               break;
4110               }
4111             if (IS_NEWLINE(Feptr)) break;
4112             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4113                 Feptr + 1 >= mb->end_subject &&
4114                 NLBLOCK->nltype == NLTYPE_FIXED &&
4115                 NLBLOCK->nllen == 2 &&
4116                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4117               {
4118               mb->hitend = TRUE;
4119               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4120               }
4121             Feptr++;
4122             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4123             }
4124           break;
4125 
4126           case OP_ALLANY:
4127           if (Lmax < UINT32_MAX)
4128             {
4129             for (i = Lmin; i < Lmax; i++)
4130               {
4131               if (Feptr >= mb->end_subject)
4132                 {
4133                 SCHECK_PARTIAL();
4134                 break;
4135                 }
4136               Feptr++;
4137               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4138               }
4139             }
4140           else
4141             {
4142             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4143             SCHECK_PARTIAL();
4144             }
4145           break;
4146 
4147           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4148 
4149           case OP_ANYBYTE:
4150           fc = Lmax - Lmin;
4151           if (fc > (uint32_t)(mb->end_subject - Feptr))
4152             {
4153             Feptr = mb->end_subject;
4154             SCHECK_PARTIAL();
4155             }
4156           else Feptr += fc;
4157           break;
4158 
4159           case OP_ANYNL:
4160           for (i = Lmin; i < Lmax; i++)
4161             {
4162             int len = 1;
4163             if (Feptr >= mb->end_subject)
4164               {
4165               SCHECK_PARTIAL();
4166               break;
4167               }
4168             GETCHARLEN(fc, Feptr, len);
4169             if (fc == CHAR_CR)
4170               {
4171               if (++Feptr >= mb->end_subject) break;
4172               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4173               }
4174             else
4175               {
4176               if (fc != CHAR_LF &&
4177                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4178                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4179 #ifndef EBCDIC
4180                     && fc != 0x2028 && fc != 0x2029
4181 #endif  /* Not EBCDIC */
4182                     )))
4183                 break;
4184               Feptr += len;
4185               }
4186             }
4187           break;
4188 
4189           case OP_NOT_HSPACE:
4190           case OP_HSPACE:
4191           for (i = Lmin; i < Lmax; i++)
4192             {
4193             BOOL gotspace;
4194             int len = 1;
4195             if (Feptr >= mb->end_subject)
4196               {
4197               SCHECK_PARTIAL();
4198               break;
4199               }
4200             GETCHARLEN(fc, Feptr, len);
4201             switch(fc)
4202               {
4203               HSPACE_CASES: gotspace = TRUE; break;
4204               default: gotspace = FALSE; break;
4205               }
4206             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4207             Feptr += len;
4208             }
4209           break;
4210 
4211           case OP_NOT_VSPACE:
4212           case OP_VSPACE:
4213           for (i = Lmin; i < Lmax; i++)
4214             {
4215             BOOL gotspace;
4216             int len = 1;
4217             if (Feptr >= mb->end_subject)
4218               {
4219               SCHECK_PARTIAL();
4220               break;
4221               }
4222             GETCHARLEN(fc, Feptr, len);
4223             switch(fc)
4224               {
4225               VSPACE_CASES: gotspace = TRUE; break;
4226               default: gotspace = FALSE; break;
4227               }
4228             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4229             Feptr += len;
4230             }
4231           break;
4232 
4233           case OP_NOT_DIGIT:
4234           for (i = Lmin; i < Lmax; i++)
4235             {
4236             int len = 1;
4237             if (Feptr >= mb->end_subject)
4238               {
4239               SCHECK_PARTIAL();
4240               break;
4241               }
4242             GETCHARLEN(fc, Feptr, len);
4243             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4244             Feptr+= len;
4245             }
4246           break;
4247 
4248           case OP_DIGIT:
4249           for (i = Lmin; i < Lmax; i++)
4250             {
4251             int len = 1;
4252             if (Feptr >= mb->end_subject)
4253               {
4254               SCHECK_PARTIAL();
4255               break;
4256               }
4257             GETCHARLEN(fc, Feptr, len);
4258             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4259             Feptr+= len;
4260             }
4261           break;
4262 
4263           case OP_NOT_WHITESPACE:
4264           for (i = Lmin; i < Lmax; i++)
4265             {
4266             int len = 1;
4267             if (Feptr >= mb->end_subject)
4268               {
4269               SCHECK_PARTIAL();
4270               break;
4271               }
4272             GETCHARLEN(fc, Feptr, len);
4273             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4274             Feptr+= len;
4275             }
4276           break;
4277 
4278           case OP_WHITESPACE:
4279           for (i = Lmin; i < Lmax; i++)
4280             {
4281             int len = 1;
4282             if (Feptr >= mb->end_subject)
4283               {
4284               SCHECK_PARTIAL();
4285               break;
4286               }
4287             GETCHARLEN(fc, Feptr, len);
4288             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4289             Feptr+= len;
4290             }
4291           break;
4292 
4293           case OP_NOT_WORDCHAR:
4294           for (i = Lmin; i < Lmax; i++)
4295             {
4296             int len = 1;
4297             if (Feptr >= mb->end_subject)
4298               {
4299               SCHECK_PARTIAL();
4300               break;
4301               }
4302             GETCHARLEN(fc, Feptr, len);
4303             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4304             Feptr+= len;
4305             }
4306           break;
4307 
4308           case OP_WORDCHAR:
4309           for (i = Lmin; i < Lmax; i++)
4310             {
4311             int len = 1;
4312             if (Feptr >= mb->end_subject)
4313               {
4314               SCHECK_PARTIAL();
4315               break;
4316               }
4317             GETCHARLEN(fc, Feptr, len);
4318             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4319             Feptr+= len;
4320             }
4321           break;
4322 
4323           default:
4324           return PCRE2_ERROR_INTERNAL;
4325           }
4326 
4327         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4328 
4329         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4330         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4331         too far. */
4332 
4333         for(;;)
4334           {
4335           if (Feptr <= Lstart_eptr) break;
4336           RMATCH(Fecode, RM221);
4337           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4338           Feptr--;
4339           BACKCHAR(Feptr);
4340           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4341               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4342             Feptr--;
4343           }
4344         }
4345       else
4346 #endif  /* SUPPORT_UNICODE */
4347 
4348       /* Not UTF mode */
4349         {
4350         switch(Lctype)
4351           {
4352           case OP_ANY:
4353           for (i = Lmin; i < Lmax; i++)
4354             {
4355             if (Feptr >= mb->end_subject)
4356               {
4357               SCHECK_PARTIAL();
4358               break;
4359               }
4360             if (IS_NEWLINE(Feptr)) break;
4361             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4362                 Feptr + 1 >= mb->end_subject &&
4363                 NLBLOCK->nltype == NLTYPE_FIXED &&
4364                 NLBLOCK->nllen == 2 &&
4365                 *Feptr == NLBLOCK->nl[0])
4366               {
4367               mb->hitend = TRUE;
4368               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4369               }
4370             Feptr++;
4371             }
4372           break;
4373 
4374           case OP_ALLANY:
4375           case OP_ANYBYTE:
4376           fc = Lmax - Lmin;
4377           if (fc > (uint32_t)(mb->end_subject - Feptr))
4378             {
4379             Feptr = mb->end_subject;
4380             SCHECK_PARTIAL();
4381             }
4382           else Feptr += fc;
4383           break;
4384 
4385           case OP_ANYNL:
4386           for (i = Lmin; i < Lmax; i++)
4387             {
4388             if (Feptr >= mb->end_subject)
4389               {
4390               SCHECK_PARTIAL();
4391               break;
4392               }
4393             fc = *Feptr;
4394             if (fc == CHAR_CR)
4395               {
4396               if (++Feptr >= mb->end_subject) break;
4397               if (*Feptr == CHAR_LF) Feptr++;
4398               }
4399             else
4400               {
4401               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4402                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4403 #if PCRE2_CODE_UNIT_WIDTH != 8
4404                  && fc != 0x2028 && fc != 0x2029
4405 #endif
4406                  ))) break;
4407               Feptr++;
4408               }
4409             }
4410           break;
4411 
4412           case OP_NOT_HSPACE:
4413           for (i = Lmin; i < Lmax; i++)
4414             {
4415             if (Feptr >= mb->end_subject)
4416               {
4417               SCHECK_PARTIAL();
4418               break;
4419               }
4420             switch(*Feptr)
4421               {
4422               default: Feptr++; break;
4423               HSPACE_BYTE_CASES:
4424 #if PCRE2_CODE_UNIT_WIDTH != 8
4425               HSPACE_MULTIBYTE_CASES:
4426 #endif
4427               goto ENDLOOP00;
4428               }
4429             }
4430           ENDLOOP00:
4431           break;
4432 
4433           case OP_HSPACE:
4434           for (i = Lmin; i < Lmax; i++)
4435             {
4436             if (Feptr >= mb->end_subject)
4437               {
4438               SCHECK_PARTIAL();
4439               break;
4440               }
4441             switch(*Feptr)
4442               {
4443               default: goto ENDLOOP01;
4444               HSPACE_BYTE_CASES:
4445 #if PCRE2_CODE_UNIT_WIDTH != 8
4446               HSPACE_MULTIBYTE_CASES:
4447 #endif
4448               Feptr++; break;
4449               }
4450             }
4451           ENDLOOP01:
4452           break;
4453 
4454           case OP_NOT_VSPACE:
4455           for (i = Lmin; i < Lmax; i++)
4456             {
4457             if (Feptr >= mb->end_subject)
4458               {
4459               SCHECK_PARTIAL();
4460               break;
4461               }
4462             switch(*Feptr)
4463               {
4464               default: Feptr++; break;
4465               VSPACE_BYTE_CASES:
4466 #if PCRE2_CODE_UNIT_WIDTH != 8
4467               VSPACE_MULTIBYTE_CASES:
4468 #endif
4469               goto ENDLOOP02;
4470               }
4471             }
4472           ENDLOOP02:
4473           break;
4474 
4475           case OP_VSPACE:
4476           for (i = Lmin; i < Lmax; i++)
4477             {
4478             if (Feptr >= mb->end_subject)
4479               {
4480               SCHECK_PARTIAL();
4481               break;
4482               }
4483             switch(*Feptr)
4484               {
4485               default: goto ENDLOOP03;
4486               VSPACE_BYTE_CASES:
4487 #if PCRE2_CODE_UNIT_WIDTH != 8
4488               VSPACE_MULTIBYTE_CASES:
4489 #endif
4490               Feptr++; break;
4491               }
4492             }
4493           ENDLOOP03:
4494           break;
4495 
4496           case OP_NOT_DIGIT:
4497           for (i = Lmin; i < Lmax; i++)
4498             {
4499             if (Feptr >= mb->end_subject)
4500               {
4501               SCHECK_PARTIAL();
4502               break;
4503               }
4504             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4505               break;
4506             Feptr++;
4507             }
4508           break;
4509 
4510           case OP_DIGIT:
4511           for (i = Lmin; i < Lmax; i++)
4512             {
4513             if (Feptr >= mb->end_subject)
4514               {
4515               SCHECK_PARTIAL();
4516               break;
4517               }
4518             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4519               break;
4520             Feptr++;
4521             }
4522           break;
4523 
4524           case OP_NOT_WHITESPACE:
4525           for (i = Lmin; i < Lmax; i++)
4526             {
4527             if (Feptr >= mb->end_subject)
4528               {
4529               SCHECK_PARTIAL();
4530               break;
4531               }
4532             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4533               break;
4534             Feptr++;
4535             }
4536           break;
4537 
4538           case OP_WHITESPACE:
4539           for (i = Lmin; i < Lmax; i++)
4540             {
4541             if (Feptr >= mb->end_subject)
4542               {
4543               SCHECK_PARTIAL();
4544               break;
4545               }
4546             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4547               break;
4548             Feptr++;
4549             }
4550           break;
4551 
4552           case OP_NOT_WORDCHAR:
4553           for (i = Lmin; i < Lmax; i++)
4554             {
4555             if (Feptr >= mb->end_subject)
4556               {
4557               SCHECK_PARTIAL();
4558               break;
4559               }
4560             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4561               break;
4562             Feptr++;
4563             }
4564           break;
4565 
4566           case OP_WORDCHAR:
4567           for (i = Lmin; i < Lmax; i++)
4568             {
4569             if (Feptr >= mb->end_subject)
4570               {
4571               SCHECK_PARTIAL();
4572               break;
4573               }
4574             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4575               break;
4576             Feptr++;
4577             }
4578           break;
4579 
4580           default:
4581           return PCRE2_ERROR_INTERNAL;
4582           }
4583 
4584         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4585 
4586         for (;;)
4587           {
4588           if (Feptr == Lstart_eptr) break;
4589           RMATCH(Fecode, RM34);
4590           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4591           Feptr--;
4592           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4593               Feptr[-1] == CHAR_CR) Feptr--;
4594           }
4595         }
4596       }
4597     break;  /* End of repeat character type processing */
4598 
4599 #undef Lstart_eptr
4600 #undef Lmin
4601 #undef Lmax
4602 #undef Lctype
4603 #undef Lpropvalue
4604 
4605 
4606     /* ===================================================================== */
4607     /* Match a back reference, possibly repeatedly. Look past the end of the
4608     item to see if there is repeat information following. The OP_REF and
4609     OP_REFI opcodes are used for a reference to a numbered group or to a
4610     non-duplicated named group. For a duplicated named group, OP_DNREF and
4611     OP_DNREFI are used. In this case we must scan the list of groups to which
4612     the name refers, and use the first one that is set. */
4613 
4614 #define Lmin      F->temp_32[0]
4615 #define Lmax      F->temp_32[1]
4616 #define Lcaseless F->temp_32[2]
4617 #define Lstart    F->temp_sptr[0]
4618 #define Loffset   F->temp_size
4619 
4620     case OP_DNREF:
4621     case OP_DNREFI:
4622     Lcaseless = (Fop == OP_DNREFI);
4623       {
4624       int count = GET2(Fecode, 1+IMM2_SIZE);
4625       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4626       Fecode += 1 + 2*IMM2_SIZE;
4627 
4628       while (count-- > 0)
4629         {
4630         Loffset = (GET2(slot, 0) << 1) - 2;
4631         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4632         slot += mb->name_entry_size;
4633         }
4634       }
4635     goto REF_REPEAT;
4636 
4637     case OP_REF:
4638     case OP_REFI:
4639     Lcaseless = (Fop == OP_REFI);
4640     Loffset = (GET2(Fecode, 1) << 1) - 2;
4641     Fecode += 1 + IMM2_SIZE;
4642 
4643     /* Set up for repetition, or handle the non-repeated case. The maximum and
4644     minimum must be in the heap frame, but as they are short-term values, we
4645     use temporary fields. */
4646 
4647     REF_REPEAT:
4648     switch (*Fecode)
4649       {
4650       case OP_CRSTAR:
4651       case OP_CRMINSTAR:
4652       case OP_CRPLUS:
4653       case OP_CRMINPLUS:
4654       case OP_CRQUERY:
4655       case OP_CRMINQUERY:
4656       fc = *Fecode++ - OP_CRSTAR;
4657       Lmin = rep_min[fc];
4658       Lmax = rep_max[fc];
4659       reptype = rep_typ[fc];
4660       break;
4661 
4662       case OP_CRRANGE:
4663       case OP_CRMINRANGE:
4664       Lmin = GET2(Fecode, 1);
4665       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4666       reptype = rep_typ[*Fecode - OP_CRSTAR];
4667       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4668       Fecode += 1 + 2 * IMM2_SIZE;
4669       break;
4670 
4671       default:                  /* No repeat follows */
4672         {
4673         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4674         if (rrc != 0)
4675           {
4676           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4677           CHECK_PARTIAL();
4678           RRETURN(MATCH_NOMATCH);
4679           }
4680         }
4681       Feptr += length;
4682       continue;              /* With the main loop */
4683       }
4684 
4685     /* Handle repeated back references. If a set group has length zero, just
4686     continue with the main loop, because it matches however many times. For an
4687     unset reference, if the minimum is zero, we can also just continue. We can
4688     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4689     group behave as a zero-length group. For any other unset cases, carrying
4690     on will result in NOMATCH. */
4691 
4692     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4693       {
4694       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4695       }
4696     else  /* Group is not set */
4697       {
4698       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4699         continue;
4700       }
4701 
4702     /* First, ensure the minimum number of matches are present. */
4703 
4704     for (i = 1; i <= Lmin; i++)
4705       {
4706       PCRE2_SIZE slength;
4707       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4708       if (rrc != 0)
4709         {
4710         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4711         CHECK_PARTIAL();
4712         RRETURN(MATCH_NOMATCH);
4713         }
4714       Feptr += slength;
4715       }
4716 
4717     /* If min = max, we are done. They are not both allowed to be zero. */
4718 
4719     if (Lmin == Lmax) continue;
4720 
4721     /* If minimizing, keep trying and advancing the pointer. */
4722 
4723     if (reptype == REPTYPE_MIN)
4724       {
4725       for (;;)
4726         {
4727         PCRE2_SIZE slength;
4728         RMATCH(Fecode, RM20);
4729         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4730         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4731         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4732         if (rrc != 0)
4733           {
4734           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4735           CHECK_PARTIAL();
4736           RRETURN(MATCH_NOMATCH);
4737           }
4738         Feptr += slength;
4739         }
4740       /* Control never gets here */
4741       }
4742 
4743     /* If maximizing, find the longest string and work backwards, as long as
4744     the matched lengths for each iteration are the same. */
4745 
4746     else
4747       {
4748       BOOL samelengths = TRUE;
4749       Lstart = Feptr;     /* Starting position */
4750       Flength = Fovector[Loffset+1] - Fovector[Loffset];
4751 
4752       for (i = Lmin; i < Lmax; i++)
4753         {
4754         PCRE2_SIZE slength;
4755         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4756         if (rrc != 0)
4757           {
4758           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
4759           the soft partial matching case. */
4760 
4761           if (rrc > 0 && mb->partial != 0 &&
4762               mb->end_subject > mb->start_used_ptr)
4763             {
4764             mb->hitend = TRUE;
4765             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4766             }
4767           break;
4768           }
4769 
4770         if (slength != Flength) samelengths = FALSE;
4771         Feptr += slength;
4772         }
4773 
4774       /* If the length matched for each repetition is the same as the length of
4775       the captured group, we can easily work backwards. This is the normal
4776       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
4777       characters whose lengths (in terms of code units) differ. However, this
4778       is very rare, so we handle it by re-matching fewer and fewer times. */
4779 
4780       if (samelengths)
4781         {
4782         while (Feptr >= Lstart)
4783           {
4784           RMATCH(Fecode, RM21);
4785           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4786           Feptr -= Flength;
4787           }
4788         }
4789 
4790       /* The rare case of non-matching lengths. Re-scan the repetition for each
4791       iteration. We know that match_ref() will succeed every time. */
4792 
4793       else
4794         {
4795         Lmax = i;
4796         for (;;)
4797           {
4798           RMATCH(Fecode, RM22);
4799           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4800           if (Feptr == Lstart) break; /* Failed after minimal repetition */
4801           Feptr = Lstart;
4802           Lmax--;
4803           for (i = Lmin; i < Lmax; i++)
4804             {
4805             PCRE2_SIZE slength;
4806             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
4807             Feptr += slength;
4808             }
4809           }
4810         }
4811 
4812       RRETURN(MATCH_NOMATCH);
4813       }
4814     /* Control never gets here */
4815 
4816 #undef Lcaseless
4817 #undef Lmin
4818 #undef Lmax
4819 #undef Lstart
4820 #undef Loffset
4821 
4822 
4823 
4824 /* ========================================================================= */
4825 /*           Opcodes for the start of various parenthesized items            */
4826 /* ========================================================================= */
4827 
4828     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
4829     (*THEN) is within the current branch by comparing the address of OP_THEN
4830     that is passed back with the end of the branch. If (*THEN) is within the
4831     current branch, and the branch is one of two or more alternatives (it
4832     either starts or ends with OP_ALT), we have reached the limit of THEN's
4833     action, so convert the return code to NOMATCH, which will cause normal
4834     backtracking to happen from now on. Otherwise, THEN is passed back to an
4835     outer alternative. This implements Perl's treatment of parenthesized
4836     groups, where a group not containing | does not affect the current
4837     alternative, that is, (X) is NOT the same as (X|(*F)). */
4838 
4839 
4840     /* ===================================================================== */
4841     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
4842     bracket group, indicating that it may occur zero times. It may repeat
4843     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
4844     the pattern. Brackets with fixed upper repeat limits are compiled as a
4845     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
4846     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
4847 
4848 #define Lnext_ecode F->temp_sptr[0]
4849 
4850     case OP_BRAZERO:
4851     Lnext_ecode = Fecode + 1;
4852     RMATCH(Lnext_ecode, RM9);
4853     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4854     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4855     Fecode = Lnext_ecode + 1 + LINK_SIZE;
4856     break;
4857 
4858     case OP_BRAMINZERO:
4859     Lnext_ecode = Fecode + 1;
4860     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4861     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
4862     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4863     Fecode++;
4864     break;
4865 
4866 #undef Lnext_ecode
4867 
4868     case OP_SKIPZERO:
4869     Fecode++;
4870     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
4871     Fecode += 1 + LINK_SIZE;
4872     break;
4873 
4874 
4875     /* ===================================================================== */
4876     /* Handle possessive brackets with an unlimited repeat. The end of these
4877     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
4878     going further in the pattern. */
4879 
4880 #define Lframe_type    F->temp_32[0]
4881 #define Lmatched_once  F->temp_32[1]
4882 #define Lzero_allowed  F->temp_32[2]
4883 #define Lstart_eptr    F->temp_sptr[0]
4884 #define Lstart_group   F->temp_sptr[1]
4885 
4886     case OP_BRAPOSZERO:
4887     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
4888     Fecode += 1;
4889     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
4890       goto POSSESSIVE_CAPTURE;
4891     goto POSSESSIVE_NON_CAPTURE;
4892 
4893     case OP_BRAPOS:
4894     case OP_SBRAPOS:
4895     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4896 
4897     POSSESSIVE_NON_CAPTURE:
4898     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
4899     goto POSSESSIVE_GROUP;
4900 
4901     case OP_CBRAPOS:
4902     case OP_SCBRAPOS:
4903     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4904 
4905     POSSESSIVE_CAPTURE:
4906     number = GET2(Fecode, 1+LINK_SIZE);
4907     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
4908 
4909     POSSESSIVE_GROUP:
4910     Lmatched_once = FALSE;               /* Never matched */
4911     Lstart_group = Fecode;               /* Start of this group */
4912 
4913     for (;;)
4914       {
4915       Lstart_eptr = Feptr;               /* Position at group start */
4916       group_frame_type = Lframe_type;
4917       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
4918       if (rrc == MATCH_KETRPOS)
4919         {
4920         Lmatched_once = TRUE;            /* Matched at least once */
4921         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
4922           {
4923           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
4924           break;
4925           }
4926 
4927         Fecode = Lstart_group;
4928         continue;
4929         }
4930 
4931       /* See comment above about handling THEN. */
4932 
4933       if (rrc == MATCH_THEN)
4934         {
4935         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
4936         if (mb->verb_ecode_ptr < next_ecode &&
4937             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
4938           rrc = MATCH_NOMATCH;
4939         }
4940 
4941       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4942       Fecode += GET(Fecode, 1);
4943       if (*Fecode != OP_ALT) break;
4944       }
4945 
4946     /* Success if matched something or zero repeat allowed */
4947 
4948     if (Lmatched_once || Lzero_allowed)
4949       {
4950       Fecode += 1 + LINK_SIZE;
4951       break;
4952       }
4953 
4954     RRETURN(MATCH_NOMATCH);
4955 
4956 #undef Lmatched_once
4957 #undef Lzero_allowed
4958 #undef Lframe_type
4959 #undef Lstart_eptr
4960 #undef Lstart_group
4961 
4962 
4963     /* ===================================================================== */
4964     /* Handle non-capturing brackets that cannot match an empty string. When we
4965     get to the final alternative within the brackets, as long as there are no
4966     THEN's in the pattern, we can optimize by not recording a new backtracking
4967     point. (Ideally we should test for a THEN within this group, but we don't
4968     have that information.) Don't do this if we are at the very top level,
4969     however, because that would make handling assertions and once-only brackets
4970     messier when there is nothing to go back to. */
4971 
4972 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
4973 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
4974 
4975     case OP_BRA:
4976     if (mb->hasthen || Frdepth == 0)
4977       {
4978       Lframe_type = 0;
4979       goto GROUPLOOP;
4980       }
4981 
4982     for (;;)
4983       {
4984       Lnext_branch = Fecode + GET(Fecode, 1);
4985       if (*Lnext_branch != OP_ALT) break;
4986 
4987       /* This is never the final branch. We do not need to test for MATCH_THEN
4988       here because this code is not used when there is a THEN in the pattern. */
4989 
4990       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
4991       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4992       Fecode = Lnext_branch;
4993       }
4994 
4995     /* Hit the start of the final branch. Continue at this level. */
4996 
4997     Fecode += PRIV(OP_lengths)[*Fecode];
4998     break;
4999 
5000 #undef Lnext_branch
5001 
5002 
5003     /* ===================================================================== */
5004     /* Handle a capturing bracket, other than those that are possessive with an
5005     unlimited repeat. */
5006 
5007     case OP_CBRA:
5008     case OP_SCBRA:
5009     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5010     goto GROUPLOOP;
5011 
5012 
5013     /* ===================================================================== */
5014     /* Atomic groups and non-capturing brackets that can match an empty string
5015     must record a backtracking point and also set up a chained frame. */
5016 
5017     case OP_ONCE:
5018     case OP_SCRIPT_RUN:
5019     case OP_SBRA:
5020     Lframe_type = GF_NOCAPTURE | Fop;
5021 
5022     GROUPLOOP:
5023     for (;;)
5024       {
5025       group_frame_type = Lframe_type;
5026       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5027       if (rrc == MATCH_THEN)
5028         {
5029         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5030         if (mb->verb_ecode_ptr < next_ecode &&
5031             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5032           rrc = MATCH_NOMATCH;
5033         }
5034       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5035       Fecode += GET(Fecode, 1);
5036       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5037       }
5038     /* Control never reaches here. */
5039 
5040 #undef Lframe_type
5041 
5042 
5043     /* ===================================================================== */
5044     /* Recursion either matches the current regex, or some subexpression. The
5045     offset data is the offset to the starting bracket from the start of the
5046     whole pattern. (This is so that it works from duplicated subpatterns.) */
5047 
5048 #define Lframe_type F->temp_32[0]
5049 #define Lstart_branch F->temp_sptr[0]
5050 
5051     case OP_RECURSE:
5052     bracode = mb->start_code + GET(Fecode, 1);
5053     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5054 
5055     /* If we are already in a recursion, check for repeating the same one
5056     without advancing the subject pointer. This should catch convoluted mutual
5057     recursions. (Some simple cases are caught at compile time.) */
5058 
5059     if (Fcurrent_recurse != RECURSE_UNSET)
5060       {
5061       offset = Flast_group_offset;
5062       while (offset != PCRE2_UNSET)
5063         {
5064         N = (heapframe *)((char *)mb->match_frames + offset);
5065         P = (heapframe *)((char *)N - frame_size);
5066         if (N->group_frame_type == (GF_RECURSE | number))
5067           {
5068           if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5069           break;
5070           }
5071         offset = P->last_group_offset;
5072         }
5073       }
5074 
5075     /* Now run the recursion, branch by branch. */
5076 
5077     Lstart_branch = bracode;
5078     Lframe_type = GF_RECURSE | number;
5079 
5080     for (;;)
5081       {
5082       PCRE2_SPTR next_ecode;
5083 
5084       group_frame_type = Lframe_type;
5085       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5086       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5087 
5088       /* Handle backtracking verbs, which are defined in a range that can
5089       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5090       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5091 
5092       When one of these verbs triggers, the current recursion group number is
5093       recorded. If it matches the recursion we are processing, the verb
5094       happened within the recursion and we must deal with it. Otherwise it must
5095       have happened after the recursion completed, and so has to be passed
5096       back. See comment above about handling THEN. */
5097 
5098       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5099           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5100         {
5101         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5102             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5103           rrc = MATCH_NOMATCH;
5104         else RRETURN(MATCH_NOMATCH);
5105         }
5106 
5107       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5108       OP_ACCEPT code. Nothing needs to be done here. */
5109 
5110       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5111       Lstart_branch = next_ecode;
5112       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5113       }
5114     /* Control never reaches here. */
5115 
5116 #undef Lframe_type
5117 #undef Lstart_branch
5118 
5119 
5120     /* ===================================================================== */
5121     /* Positive assertions are like other groups except that PCRE doesn't allow
5122     the effect of (*THEN) to escape beyond an assertion; it is therefore
5123     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5124     captures and mark retained. Any other return is an error. */
5125 
5126 #define Lframe_type  F->temp_32[0]
5127 
5128     case OP_ASSERT:
5129     case OP_ASSERTBACK:
5130     Lframe_type = GF_NOCAPTURE | Fop;
5131     for (;;)
5132       {
5133       group_frame_type = Lframe_type;
5134       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5135       if (rrc == MATCH_ACCEPT)
5136         {
5137         memcpy(Fovector,
5138               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5139               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5140         Foffset_top = assert_accept_frame->offset_top;
5141         Fmark = assert_accept_frame->mark;
5142         break;
5143         }
5144       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5145       Fecode += GET(Fecode, 1);
5146       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5147       }
5148 
5149     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5150     Fecode += 1 + LINK_SIZE;
5151     break;
5152 
5153 #undef Lframe_type
5154 
5155 
5156     /* ===================================================================== */
5157     /* Handle negative assertions. Loop for each non-matching branch as for
5158     positive assertions. */
5159 
5160 #define Lframe_type  F->temp_32[0]
5161 
5162     case OP_ASSERT_NOT:
5163     case OP_ASSERTBACK_NOT:
5164     Lframe_type  = GF_NOCAPTURE | Fop;
5165 
5166     for (;;)
5167       {
5168       group_frame_type = Lframe_type;
5169       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5170       switch(rrc)
5171         {
5172         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5173         case MATCH_MATCH:
5174         RRETURN (MATCH_NOMATCH);
5175 
5176         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5177         case MATCH_THEN:
5178         Fecode += GET(Fecode, 1);
5179         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5180         break;
5181 
5182         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5183         case MATCH_SKIP:
5184         case MATCH_PRUNE:
5185         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5186         goto ASSERT_NOT_FAILED;
5187 
5188         default:             /* Pass back any other return */
5189         RRETURN(rrc);
5190         }
5191       }
5192 
5193     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5194     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5195     negative assertion, so carry on. */
5196 
5197     ASSERT_NOT_FAILED:
5198     Fecode += 1 + LINK_SIZE;
5199     break;
5200 
5201 #undef Lframe_type
5202 
5203 
5204     /* ===================================================================== */
5205     /* The callout item calls an external function, if one is provided, passing
5206     details of the match so far. This is mainly for debugging, though the
5207     function is able to force a failure. */
5208 
5209     case OP_CALLOUT:
5210     case OP_CALLOUT_STR:
5211     rrc = do_callout(F, mb, &length);
5212     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5213     if (rrc < 0) RRETURN(rrc);
5214     Fecode += length;
5215     break;
5216 
5217 
5218     /* ===================================================================== */
5219     /* Conditional group: compilation checked that there are no more than two
5220     branches. If the condition is false, skipping the first branch takes us
5221     past the end of the item if there is only one branch, but that's exactly
5222     what we want. */
5223 
5224     case OP_COND:
5225     case OP_SCOND:
5226 
5227     /* The variable Flength will be added to Fecode when the condition is
5228     false, to get to the second branch. Setting it to the offset to the ALT or
5229     KET, then incrementing Fecode achieves this effect. However, if the second
5230     branch is non-existent, we must point to the KET so that the end of the
5231     group is correctly processed. We now have Fecode pointing to the condition
5232     or callout. */
5233 
5234     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5235     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5236     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5237 
5238     /* Because of the way auto-callout works during compile, a callout item is
5239     inserted between OP_COND and an assertion condition. Such a callout can
5240     also be inserted manually. */
5241 
5242     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5243       {
5244       rrc = do_callout(F, mb, &length);
5245       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5246       if (rrc < 0) RRETURN(rrc);
5247 
5248       /* Advance Fecode past the callout, so it now points to the condition. We
5249       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5250 
5251       Fecode += length;
5252       Flength -= length;
5253       }
5254 
5255     /* Test the various possible conditions */
5256 
5257     condition = FALSE;
5258     switch(*Fecode)
5259       {
5260       case OP_RREF:                  /* Group recursion test */
5261       if (Fcurrent_recurse != RECURSE_UNSET)
5262         {
5263         number = GET2(Fecode, 1);
5264         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5265         }
5266       break;
5267 
5268       case OP_DNRREF:       /* Duplicate named group recursion test */
5269       if (Fcurrent_recurse != RECURSE_UNSET)
5270         {
5271         int count = GET2(Fecode, 1 + IMM2_SIZE);
5272         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5273         while (count-- > 0)
5274           {
5275           number = GET2(slot, 0);
5276           condition = number == Fcurrent_recurse;
5277           if (condition) break;
5278           slot += mb->name_entry_size;
5279           }
5280         }
5281       break;
5282 
5283       case OP_CREF:                         /* Numbered group used test */
5284       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5285       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5286       break;
5287 
5288       case OP_DNCREF:      /* Duplicate named group used test */
5289         {
5290         int count = GET2(Fecode, 1 + IMM2_SIZE);
5291         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5292         while (count-- > 0)
5293           {
5294           offset = (GET2(slot, 0) << 1) - 2;
5295           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5296           if (condition) break;
5297           slot += mb->name_entry_size;
5298           }
5299         }
5300       break;
5301 
5302       case OP_FALSE:
5303       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5304       break;
5305 
5306       case OP_TRUE:
5307       condition = TRUE;
5308       break;
5309 
5310       /* The condition is an assertion. Run code similar to the assertion code
5311       above. */
5312 
5313 #define Lpositive      F->temp_32[0]
5314 #define Lstart_branch  F->temp_sptr[0]
5315 
5316       default:
5317       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5318       Lstart_branch = Fecode;
5319 
5320       for (;;)
5321         {
5322         group_frame_type = GF_CONDASSERT | *Fecode;
5323         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5324 
5325         switch(rrc)
5326           {
5327           case MATCH_ACCEPT:  /* Save captures */
5328           memcpy(Fovector,
5329                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5330                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5331           Foffset_top = assert_accept_frame->offset_top;
5332 
5333           /* Fall through */
5334           /* In the case of a match, the captures have already been put into
5335           the current frame. */
5336 
5337           case MATCH_MATCH:
5338           condition = Lpositive;   /* TRUE for positive assertion */
5339           break;
5340 
5341           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5342           assertion; it is therefore always treated as NOMATCH. */
5343 
5344           case MATCH_NOMATCH:
5345           case MATCH_THEN:
5346           Lstart_branch += GET(Lstart_branch, 1);
5347           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5348           condition = !Lpositive;  /* TRUE for negative assertion */
5349           break;
5350 
5351           /* These force no match without checking other branches. */
5352 
5353           case MATCH_COMMIT:
5354           case MATCH_SKIP:
5355           case MATCH_PRUNE:
5356           condition = !Lpositive;
5357           break;
5358 
5359           default:
5360           RRETURN(rrc);
5361           }
5362         break;  /* Out of the branch loop */
5363         }
5364 
5365       /* If the condition is true, find the end of the assertion so that
5366       advancing past it gets us to the start of the first branch. */
5367 
5368       if (condition)
5369         {
5370         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5371         }
5372       break;  /* End of assertion condition */
5373       }
5374 
5375 #undef Lpositive
5376 #undef Lstart_branch
5377 
5378     /* Choose branch according to the condition. */
5379 
5380     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5381 
5382     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5383     group that might match an empty string. We must therefore descend a level
5384     so that the start is remembered for checking. For OP_COND we can just
5385     continue at this level. */
5386 
5387     if (Fop == OP_SCOND)
5388       {
5389       group_frame_type  = GF_NOCAPTURE | Fop;
5390       RMATCH(Fecode, RM35);
5391       RRETURN(rrc);
5392       }
5393     break;
5394 
5395 
5396 
5397 /* ========================================================================= */
5398 /*                  End of start of parenthesis opcodes                      */
5399 /* ========================================================================= */
5400 
5401 
5402     /* ===================================================================== */
5403     /* Move the subject pointer back. This occurs only at the start of each
5404     branch of a lookbehind assertion. If we are too close to the start to move
5405     back, fail. When working with UTF-8 we move back a number of characters,
5406     not bytes. */
5407 
5408     case OP_REVERSE:
5409     number = GET(Fecode, 1);
5410 #ifdef SUPPORT_UNICODE
5411     if (utf)
5412       {
5413       while (number-- > 0)
5414         {
5415         if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
5416         Feptr--;
5417         BACKCHAR(Feptr);
5418         }
5419       }
5420     else
5421 #endif
5422 
5423     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
5424 
5425       {
5426       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5427       Feptr -= number;
5428       }
5429 
5430     /* Save the earliest consulted character, then skip to next opcode */
5431 
5432     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5433     Fecode += 1 + LINK_SIZE;
5434     break;
5435 
5436 
5437     /* ===================================================================== */
5438     /* An alternation is the end of a branch; scan along to find the end of the
5439     bracketed group. */
5440 
5441     case OP_ALT:
5442     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5443     break;
5444 
5445 
5446     /* ===================================================================== */
5447     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5448     starting frame was added to the chained frames in order to remember the
5449     starting subject position for the group. */
5450 
5451     case OP_KET:
5452     case OP_KETRMIN:
5453     case OP_KETRMAX:
5454     case OP_KETRPOS:
5455 
5456     bracode = Fecode - GET(Fecode, 1);
5457 
5458     /* Point N to the frame at the start of the most recent group.
5459     Remember the subject pointer at the start of the group. */
5460 
5461     if (*bracode != OP_BRA && *bracode != OP_COND)
5462       {
5463       N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
5464       P = (heapframe *)((char *)N - frame_size);
5465       Flast_group_offset = P->last_group_offset;
5466 
5467 #ifdef DEBUG_SHOW_RMATCH
5468       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5469         N->rdepth, N->group_frame_type,
5470         (char *)P->eptr - (char *)mb->start_subject);
5471 #endif
5472 
5473       /* If we are at the end of an assertion that is a condition, return a
5474       match, discarding any intermediate backtracking points. Copy back the
5475       captures into the frame before N so that they are set on return. Doing
5476       this for all assertions, both positive and negative, seems to match what
5477       Perl does. */
5478 
5479       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5480         {
5481         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5482           Foffset_top * sizeof(PCRE2_SIZE));
5483         P->offset_top = Foffset_top;
5484         Fback_frame = (char *)F - (char *)P;
5485         RRETURN(MATCH_MATCH);
5486         }
5487       }
5488     else P = NULL;   /* Indicates starting frame not recorded */
5489 
5490     /* The group was not a conditional assertion. */
5491 
5492     switch (*bracode)
5493       {
5494       case OP_BRA:    /* No need to do anything for these */
5495       case OP_COND:
5496       case OP_SCOND:
5497       break;
5498 
5499       /* Positive assertions are like OP_ONCE, except that in addition the
5500       subject pointer must be put back to where it was at the start of the
5501       assertion. */
5502 
5503       case OP_ASSERT:
5504       case OP_ASSERTBACK:
5505       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5506       Feptr = P->eptr;
5507       /* Fall through */
5508 
5509       /* For an atomic group, discard internal backtracking points. We must
5510       also ensure that any remaining branches within the top-level of the group
5511       are not tried. Do this by adjusting the code pointer within the backtrack
5512       frame so that it points to the final branch. */
5513 
5514       case OP_ONCE:
5515       Fback_frame = ((char *)F - (char *)P);
5516       for (;;)
5517         {
5518         uint32_t y = GET(P->ecode,1);
5519         if ((P->ecode)[y] != OP_ALT) break;
5520         P->ecode += y;
5521         }
5522       break;
5523 
5524       /* A matching negative assertion returns MATCH, which is turned into
5525       NOMATCH at the assertion level. */
5526 
5527       case OP_ASSERT_NOT:
5528       case OP_ASSERTBACK_NOT:
5529       RRETURN(MATCH_MATCH);
5530 
5531       /* At the end of a script run, apply the script-checking rules. This code
5532       will never by exercised if Unicode support it not compiled, because in
5533       that environment script runs cause an error at compile time. */
5534 
5535       case OP_SCRIPT_RUN:
5536       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5537       break;
5538 
5539       /* Whole-pattern recursion is coded as a recurse into group 0, so it
5540       won't be picked up here. Instead, we catch it when the OP_END is reached.
5541       Other recursion is handled here. */
5542 
5543       case OP_CBRA:
5544       case OP_CBRAPOS:
5545       case OP_SCBRA:
5546       case OP_SCBRAPOS:
5547       number = GET2(bracode, 1+LINK_SIZE);
5548 
5549       /* Handle a recursively called group. We reinstate the previous set of
5550       captures and then carry on after the recursion call. */
5551 
5552       if (Fcurrent_recurse == number)
5553         {
5554         P = (heapframe *)((char *)N - frame_size);
5555         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5556           P->offset_top * sizeof(PCRE2_SIZE));
5557         Foffset_top = P->offset_top;
5558         Fcapture_last = P->capture_last;
5559         Fcurrent_recurse = P->current_recurse;
5560         Fecode = P->ecode + 1 + LINK_SIZE;
5561         continue;  /* With next opcode */
5562         }
5563 
5564       /* Deal with actual capturing. */
5565 
5566       offset = (number << 1) - 2;
5567       Fcapture_last = number;
5568       Fovector[offset] = P->eptr - mb->start_subject;
5569       Fovector[offset+1] = Feptr - mb->start_subject;
5570       if (offset >= Foffset_top) Foffset_top = offset + 2;
5571       break;
5572       }  /* End actions relating to the starting opcode */
5573 
5574     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5575     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5576     at a time from the outer level. This must precede the empty string test -
5577     in this case that test is done at the outer level. */
5578 
5579     if (*Fecode == OP_KETRPOS)
5580       {
5581       memcpy((char *)P + offsetof(heapframe, eptr),
5582              (char *)F + offsetof(heapframe, eptr),
5583              frame_copy_size);
5584       RRETURN(MATCH_KETRPOS);
5585       }
5586 
5587     /* Handle the different kinds of closing brackets. A non-repeating ket
5588     needs no special action, just continuing at this level. This also happens
5589     for the repeating kets if the group matched no characters, in order to
5590     forcibly break infinite loops. Otherwise, the repeating kets try the rest
5591     of the pattern or restart from the preceding bracket, in the appropriate
5592     order. */
5593 
5594     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5595       {
5596       if (Fop == OP_KETRMIN)
5597         {
5598         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5599         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5600         Fecode -= GET(Fecode, 1);
5601         break;   /* End of ket processing */
5602         }
5603 
5604       /* Repeat the maximum number of times (KETRMAX) */
5605 
5606       RMATCH(bracode, RM7);
5607       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5608       }
5609 
5610     /* Carry on at this level for a non-repeating ket, or after matching an
5611     empty string, or after repeating for a maximum number of times. */
5612 
5613     Fecode += 1 + LINK_SIZE;
5614     break;
5615 
5616 
5617     /* ===================================================================== */
5618     /* Start and end of line assertions, not multiline mode. */
5619 
5620     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5621     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5622       RRETURN(MATCH_NOMATCH);
5623     Fecode++;
5624     break;
5625 
5626     case OP_SOD:    /* Unconditional start of subject */
5627     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5628     Fecode++;
5629     break;
5630 
5631     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5632     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5633 
5634     case OP_DOLL:
5635     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5636     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5637 
5638     /* Fall through */
5639     /* Unconditional end of subject assertion (\z) */
5640 
5641     case OP_EOD:
5642     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5643     SCHECK_PARTIAL();
5644     Fecode++;
5645     break;
5646 
5647     /* End of subject or ending \n assertion (\Z) */
5648 
5649     case OP_EODN:
5650     ASSERT_NL_OR_EOS:
5651     if (Feptr < mb->end_subject &&
5652         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5653       {
5654       if (mb->partial != 0 &&
5655           Feptr + 1 >= mb->end_subject &&
5656           NLBLOCK->nltype == NLTYPE_FIXED &&
5657           NLBLOCK->nllen == 2 &&
5658           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5659         {
5660         mb->hitend = TRUE;
5661         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5662         }
5663       RRETURN(MATCH_NOMATCH);
5664       }
5665 
5666     /* Either at end of string or \n before end. */
5667 
5668     SCHECK_PARTIAL();
5669     Fecode++;
5670     break;
5671 
5672 
5673     /* ===================================================================== */
5674     /* Start and end of line assertions, multiline mode. */
5675 
5676     /* Start of subject unless notbol, or after any newline except for one at
5677     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5678 
5679     case OP_CIRCM:
5680     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5681       RRETURN(MATCH_NOMATCH);
5682     if (Feptr != mb->start_subject &&
5683         ((Feptr == mb->end_subject &&
5684            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5685          !WAS_NEWLINE(Feptr)))
5686       RRETURN(MATCH_NOMATCH);
5687     Fecode++;
5688     break;
5689 
5690     /* Assert before any newline, or before end of subject unless noteol is
5691     set. */
5692 
5693     case OP_DOLLM:
5694     if (Feptr < mb->end_subject)
5695       {
5696       if (!IS_NEWLINE(Feptr))
5697         {
5698         if (mb->partial != 0 &&
5699             Feptr + 1 >= mb->end_subject &&
5700             NLBLOCK->nltype == NLTYPE_FIXED &&
5701             NLBLOCK->nllen == 2 &&
5702             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5703           {
5704           mb->hitend = TRUE;
5705           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5706           }
5707         RRETURN(MATCH_NOMATCH);
5708         }
5709       }
5710     else
5711       {
5712       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5713       SCHECK_PARTIAL();
5714       }
5715     Fecode++;
5716     break;
5717 
5718 
5719     /* ===================================================================== */
5720     /* Start of match assertion */
5721 
5722     case OP_SOM:
5723     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
5724     Fecode++;
5725     break;
5726 
5727 
5728     /* ===================================================================== */
5729     /* Reset the start of match point */
5730 
5731     case OP_SET_SOM:
5732     Fstart_match = Feptr;
5733     Fecode++;
5734     break;
5735 
5736 
5737     /* ===================================================================== */
5738     /* Word boundary assertions. Find out if the previous and current
5739     characters are "word" characters. It takes a bit more work in UTF mode.
5740     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
5741     not set. When it is set, use Unicode properties if available, even when not
5742     in UTF mode. Remember the earliest and latest consulted characters. */
5743 
5744     case OP_NOT_WORD_BOUNDARY:
5745     case OP_WORD_BOUNDARY:
5746     if (Feptr == mb->start_subject) prev_is_word = FALSE; else
5747       {
5748       PCRE2_SPTR lastptr = Feptr - 1;
5749 #ifdef SUPPORT_UNICODE
5750       if (utf)
5751         {
5752         BACKCHAR(lastptr);
5753         GETCHAR(fc, lastptr);
5754         }
5755       else
5756 #endif  /* SUPPORT_UNICODE */
5757       fc = *lastptr;
5758       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
5759 #ifdef SUPPORT_UNICODE
5760       if ((mb->poptions & PCRE2_UCP) != 0)
5761         {
5762         if (fc == '_') prev_is_word = TRUE; else
5763           {
5764           int cat = UCD_CATEGORY(fc);
5765           prev_is_word = (cat == ucp_L || cat == ucp_N);
5766           }
5767         }
5768       else
5769 #endif  /* SUPPORT_UNICODE */
5770       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5771       }
5772 
5773     /* Get status of next character */
5774 
5775     if (Feptr >= mb->end_subject)
5776       {
5777       SCHECK_PARTIAL();
5778       cur_is_word = FALSE;
5779       }
5780     else
5781       {
5782       PCRE2_SPTR nextptr = Feptr + 1;
5783 #ifdef SUPPORT_UNICODE
5784       if (utf)
5785         {
5786         FORWARDCHARTEST(nextptr, mb->end_subject);
5787         GETCHAR(fc, Feptr);
5788         }
5789       else
5790 #endif  /* SUPPORT_UNICODE */
5791       fc = *Feptr;
5792       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
5793 #ifdef SUPPORT_UNICODE
5794       if ((mb->poptions & PCRE2_UCP) != 0)
5795         {
5796         if (fc == '_') cur_is_word = TRUE; else
5797           {
5798           int cat = UCD_CATEGORY(fc);
5799           cur_is_word = (cat == ucp_L || cat == ucp_N);
5800           }
5801         }
5802       else
5803 #endif  /* SUPPORT_UNICODE */
5804       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5805       }
5806 
5807     /* Now see if the situation is what we want */
5808 
5809     if ((*Fecode++ == OP_WORD_BOUNDARY)?
5810          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
5811       RRETURN(MATCH_NOMATCH);
5812     break;
5813 
5814 
5815     /* ===================================================================== */
5816     /* Backtracking (*VERB)s, with and without arguments. Note that if the
5817     pattern is successfully matched, we do not come back from RMATCH. */
5818 
5819     case OP_MARK:
5820     Fmark = mb->nomatch_mark = Fecode + 2;
5821     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
5822 
5823     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
5824     argument, and we must check whether that argument matches this MARK's
5825     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
5826     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
5827     position that corresponds to this mark. Otherwise, pass back the return
5828     code unaltered. */
5829 
5830     if (rrc == MATCH_SKIP_ARG &&
5831              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
5832       {
5833       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5834       RRETURN(MATCH_SKIP);
5835       }
5836     RRETURN(rrc);
5837 
5838     case OP_FAIL:
5839     RRETURN(MATCH_NOMATCH);
5840 
5841     /* Record the current recursing group number in mb->verb_current_recurse
5842     when a backtracking return such as MATCH_COMMIT is given. This enables the
5843     recurse processing to catch verbs from within the recursion. */
5844 
5845     case OP_COMMIT:
5846     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
5847     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5848     mb->verb_current_recurse = Fcurrent_recurse;
5849     RRETURN(MATCH_COMMIT);
5850 
5851     case OP_COMMIT_ARG:
5852     Fmark = mb->nomatch_mark = Fecode + 2;
5853     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
5854     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5855     mb->verb_current_recurse = Fcurrent_recurse;
5856     RRETURN(MATCH_COMMIT);
5857 
5858     case OP_PRUNE:
5859     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
5860     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5861     mb->verb_current_recurse = Fcurrent_recurse;
5862     RRETURN(MATCH_PRUNE);
5863 
5864     case OP_PRUNE_ARG:
5865     Fmark = mb->nomatch_mark = Fecode + 2;
5866     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
5867     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5868     mb->verb_current_recurse = Fcurrent_recurse;
5869     RRETURN(MATCH_PRUNE);
5870 
5871     case OP_SKIP:
5872     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
5873     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5874     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5875     mb->verb_current_recurse = Fcurrent_recurse;
5876     RRETURN(MATCH_SKIP);
5877 
5878     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
5879     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
5880     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
5881     that failed and any that precede it (either they also failed, or were not
5882     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
5883     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
5884     set to the count of the one that failed. */
5885 
5886     case OP_SKIP_ARG:
5887     mb->skip_arg_count++;
5888     if (mb->skip_arg_count <= mb->ignore_skip_arg)
5889       {
5890       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
5891       break;
5892       }
5893     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
5894     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5895 
5896     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
5897     return code. This will either be caught by a matching MARK, or get to the
5898     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
5899     mb->skip_arg_count. */
5900 
5901     mb->verb_skip_ptr = Fecode + 2;
5902     mb->verb_current_recurse = Fcurrent_recurse;
5903     RRETURN(MATCH_SKIP_ARG);
5904 
5905     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
5906     the branch in which it occurs can be determined. */
5907 
5908     case OP_THEN:
5909     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
5910     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5911     mb->verb_ecode_ptr = Fecode;
5912     mb->verb_current_recurse = Fcurrent_recurse;
5913     RRETURN(MATCH_THEN);
5914 
5915     case OP_THEN_ARG:
5916     Fmark = mb->nomatch_mark = Fecode + 2;
5917     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
5918     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5919     mb->verb_ecode_ptr = Fecode;
5920     mb->verb_current_recurse = Fcurrent_recurse;
5921     RRETURN(MATCH_THEN);
5922 
5923 
5924     /* ===================================================================== */
5925     /* There's been some horrible disaster. Arrival here can only mean there is
5926     something seriously wrong in the code above or the OP_xxx definitions. */
5927 
5928     default:
5929     return PCRE2_ERROR_INTERNAL;
5930     }
5931 
5932   /* Do not insert any code in here without much thought; it is assumed
5933   that "continue" in the code above comes out to here to repeat the main
5934   loop. */
5935 
5936   }  /* End of main loop */
5937 /* Control never reaches here */
5938 
5939 
5940 /* ========================================================================= */
5941 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
5942 indicates which label we actually want to return to. The value in Frdepth is
5943 the index number of the frame in the vector. The return value has been placed
5944 in rrc. */
5945 
5946 #define LBL(val) case val: goto L_RM##val;
5947 
5948 RETURN_SWITCH:
5949 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
5950 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
5951 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
5952 
5953 #ifdef DEBUG_SHOW_RMATCH
5954 fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
5955 #endif
5956 
5957 switch (Freturn_id)
5958   {
5959   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5960   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
5961   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
5962   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
5963   LBL(33) LBL(34) LBL(35) LBL(36)
5964 
5965 #ifdef SUPPORT_WIDE_CHARS
5966   LBL(100) LBL(101)
5967 #endif
5968 
5969 #ifdef SUPPORT_UNICODE
5970   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
5971   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
5972   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
5973   LBL(221) LBL(222)
5974 #endif
5975 
5976   default:
5977   return PCRE2_ERROR_INTERNAL;
5978   }
5979 #undef LBL
5980 }
5981 
5982 
5983 /*************************************************
5984 *           Match a Regular Expression           *
5985 *************************************************/
5986 
5987 /* This function applies a compiled pattern to a subject string and picks out
5988 portions of the string if it matches. Two elements in the vector are set for
5989 each substring: the offsets to the start and end of the substring.
5990 
5991 Arguments:
5992   code            points to the compiled expression
5993   subject         points to the subject string
5994   length          length of subject string (may contain binary zeros)
5995   start_offset    where to start in the subject string
5996   options         option bits
5997   match_data      points to a match_data block
5998   mcontext        points a PCRE2 context
5999 
6000 Returns:          > 0 => success; value is the number of ovector pairs filled
6001                   = 0 => success, but ovector is not big enough
6002                    -1 => failed to match (PCRE2_ERROR_NOMATCH)
6003                    -2 => partial match (PCRE2_ERROR_PARTIAL)
6004                  < -2 => some kind of unexpected problem
6005 */
6006 
6007 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6008 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6009   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6010   pcre2_match_context *mcontext)
6011 {
6012 int rc;
6013 int was_zero_terminated = 0;
6014 const uint8_t *start_bits = NULL;
6015 const pcre2_real_code *re = (const pcre2_real_code *)code;
6016 
6017 
6018 BOOL anchored;
6019 BOOL firstline;
6020 BOOL has_first_cu = FALSE;
6021 BOOL has_req_cu = FALSE;
6022 BOOL startline;
6023 BOOL utf;
6024 
6025 PCRE2_UCHAR first_cu = 0;
6026 PCRE2_UCHAR first_cu2 = 0;
6027 PCRE2_UCHAR req_cu = 0;
6028 PCRE2_UCHAR req_cu2 = 0;
6029 
6030 PCRE2_SPTR bumpalong_limit;
6031 PCRE2_SPTR end_subject;
6032 PCRE2_SPTR start_match = subject + start_offset;
6033 PCRE2_SPTR req_cu_ptr = start_match - 1;
6034 PCRE2_SPTR start_partial = NULL;
6035 PCRE2_SPTR match_partial = NULL;
6036 
6037 PCRE2_SIZE frame_size;
6038 
6039 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6040 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6041 
6042 pcre2_callout_block cb;
6043 match_block actual_match_block;
6044 match_block *mb = &actual_match_block;
6045 
6046 /* Allocate an initial vector of backtracking frames on the stack. If this
6047 proves to be too small, it is replaced by a larger one on the heap. To get a
6048 vector of the size required that is aligned for pointers, allocate it as a
6049 vector of pointers. */
6050 
6051 PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
6052     PCRE2_KEEP_UNINITIALIZED;
6053 mb->stack_frames = (heapframe *)stack_frames_vector;
6054 
6055 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
6056 subject string. */
6057 
6058 if (length == PCRE2_ZERO_TERMINATED)
6059   {
6060   length = PRIV(strlen)(subject);
6061   was_zero_terminated = 1;
6062   }
6063 end_subject = subject + length;
6064 
6065 /* Plausibility checks */
6066 
6067 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6068 if (code == NULL || subject == NULL || match_data == NULL)
6069   return PCRE2_ERROR_NULL;
6070 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6071 
6072 /* Check that the first field in the block is the magic number. */
6073 
6074 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6075 
6076 /* Check the code unit width. */
6077 
6078 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6079   return PCRE2_ERROR_BADMODE;
6080 
6081 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6082 options variable for this function. Users of PCRE2 who are not calling the
6083 function directly would like to have a way of setting these flags, in the same
6084 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6085 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6086 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6087 transfer to the options for this function. The bits are guaranteed to be
6088 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6089 that the match-time bits are not more significant than the flag bits. If by
6090 accident this is not the case, a compile-time division by zero error will
6091 occur. */
6092 
6093 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6094 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6095 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6096 #undef FF
6097 #undef OO
6098 
6099 /* These two settings are used in the code for checking a UTF string that
6100 follows immediately afterwards. Other values in the mb block are used only
6101 during interpretive processing, not when the JIT support is in use, so they are
6102 set up later. */
6103 
6104 utf = (re->overall_options & PCRE2_UTF) != 0;
6105 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6106               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6107 
6108 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6109 time. */
6110 
6111 if (mb->partial != 0 &&
6112    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6113   return PCRE2_ERROR_BADOPTION;
6114 
6115 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
6116 we must also check that a starting offset does not point into the middle of a
6117 multiunit character. We check only the portion of the subject that is going to
6118 be inspected during matching - from the offset minus the maximum back reference
6119 to the given length. This saves time when a small part of a large subject is
6120 being matched by the use of a starting offset. Note that the maximum lookbehind
6121 is a number of characters, not code units. */
6122 
6123 #ifdef SUPPORT_UNICODE
6124 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
6125   {
6126   PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
6127 
6128   if (start_offset > 0)
6129     {
6130 #if PCRE2_CODE_UNIT_WIDTH != 32
6131     unsigned int i;
6132     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6133       return PCRE2_ERROR_BADUTFOFFSET;
6134     for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
6135       {
6136       check_subject--;
6137       while (check_subject > subject &&
6138 #if PCRE2_CODE_UNIT_WIDTH == 8
6139       (*check_subject & 0xc0) == 0x80)
6140 #else  /* 16-bit */
6141       (*check_subject & 0xfc00) == 0xdc00)
6142 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6143         check_subject--;
6144       }
6145 #else
6146     /* In the 32-bit library, one code unit equals one character. However,
6147     we cannot just subtract the lookbehind and then compare pointers, because
6148     a very large lookbehind could create an invalid pointer. */
6149 
6150     if (start_offset >= re->max_lookbehind)
6151       check_subject -= re->max_lookbehind;
6152     else
6153       check_subject = subject;
6154 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6155     }
6156 
6157   /* Validate the relevant portion of the subject. After an error, adjust the
6158   offset to be an absolute offset in the whole string. */
6159 
6160   match_data->rc = PRIV(valid_utf)(check_subject,
6161     length - (check_subject - subject), &(match_data->startchar));
6162   if (match_data->rc != 0)
6163     {
6164     match_data->startchar += check_subject - subject;
6165     return match_data->rc;
6166     }
6167   }
6168 #endif  /* SUPPORT_UNICODE */
6169 
6170 /* It is an error to set an offset limit without setting the flag at compile
6171 time. */
6172 
6173 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6174      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6175   return PCRE2_ERROR_BADOFFSETLIMIT;
6176 
6177 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6178 free the memory that was obtained. Set the field to NULL for no match cases. */
6179 
6180 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6181   {
6182   match_data->memctl.free((void *)match_data->subject,
6183     match_data->memctl.memory_data);
6184   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6185   }
6186 match_data->subject = NULL;
6187 
6188 /* If the pattern was successfully studied with JIT support, run the JIT
6189 executable instead of the rest of this function. Most options must be set at
6190 compile time for the JIT code to be usable. Fallback to the normal code path if
6191 an unsupported option is set or if JIT returns BADOPTION (which means that the
6192 selected normal or partial matching mode was not compiled). */
6193 
6194 #ifdef SUPPORT_JIT
6195 if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
6196   {
6197   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6198     match_data, mcontext);
6199   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6200     {
6201     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6202       {
6203       length = CU2BYTES(length + was_zero_terminated);
6204       match_data->subject = match_data->memctl.malloc(length,
6205         match_data->memctl.memory_data);
6206       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6207       memcpy((void *)match_data->subject, subject, length);
6208       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6209       }
6210     return rc;
6211     }
6212   }
6213 #endif
6214 
6215 /* Carry on with non-JIT matching. A NULL match context means "use a default
6216 context", but we take the memory control functions from the pattern. */
6217 
6218 if (mcontext == NULL)
6219   {
6220   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6221   mb->memctl = re->memctl;
6222   }
6223 else mb->memctl = mcontext->memctl;
6224 
6225 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6226 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6227 startline = (re->flags & PCRE2_STARTLINE) != 0;
6228 bumpalong_limit =  (mcontext->offset_limit == PCRE2_UNSET)?
6229   end_subject : subject + mcontext->offset_limit;
6230 
6231 /* Initialize and set up the fixed fields in the callout block, with a pointer
6232 in the match block. */
6233 
6234 mb->cb = &cb;
6235 cb.version = 2;
6236 cb.subject = subject;
6237 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6238 cb.callout_flags = 0;
6239 
6240 /* Fill in the remaining fields in the match block. */
6241 
6242 mb->callout = mcontext->callout;
6243 mb->callout_data = mcontext->callout_data;
6244 
6245 mb->start_subject = subject;
6246 mb->start_offset = start_offset;
6247 mb->end_subject = end_subject;
6248 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6249 
6250 mb->moptions = options;                 /* Match options */
6251 mb->poptions = re->overall_options;     /* Pattern options */
6252 
6253 mb->ignore_skip_arg = 0;
6254 mb->mark = mb->nomatch_mark = NULL;     /* In case never set */
6255 mb->hitend = FALSE;
6256 
6257 /* The name table is needed for finding all the numbers associated with a
6258 given name, for condition testing. The code follows the name table. */
6259 
6260 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6261 mb->name_count = re->name_count;
6262 mb->name_entry_size = re->name_entry_size;
6263 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6264 
6265 /* Process the \R and newline settings. */
6266 
6267 mb->bsr_convention = re->bsr_convention;
6268 mb->nltype = NLTYPE_FIXED;
6269 switch(re->newline_convention)
6270   {
6271   case PCRE2_NEWLINE_CR:
6272   mb->nllen = 1;
6273   mb->nl[0] = CHAR_CR;
6274   break;
6275 
6276   case PCRE2_NEWLINE_LF:
6277   mb->nllen = 1;
6278   mb->nl[0] = CHAR_NL;
6279   break;
6280 
6281   case PCRE2_NEWLINE_NUL:
6282   mb->nllen = 1;
6283   mb->nl[0] = CHAR_NUL;
6284   break;
6285 
6286   case PCRE2_NEWLINE_CRLF:
6287   mb->nllen = 2;
6288   mb->nl[0] = CHAR_CR;
6289   mb->nl[1] = CHAR_NL;
6290   break;
6291 
6292   case PCRE2_NEWLINE_ANY:
6293   mb->nltype = NLTYPE_ANY;
6294   break;
6295 
6296   case PCRE2_NEWLINE_ANYCRLF:
6297   mb->nltype = NLTYPE_ANYCRLF;
6298   break;
6299 
6300   default: return PCRE2_ERROR_INTERNAL;
6301   }
6302 
6303 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6304 vector at the end, whose size depends on the number of capturing parentheses in
6305 the pattern. It is not used at all if there are no capturing parentheses.
6306 
6307   frame_size             is the total size of each frame
6308   mb->frame_vector_size  is the total usable size of the vector (rounded down
6309                            to a whole number of frames)
6310 
6311 The last of these is changed within the match() function if the frame vector
6312 has to be expanded. We therefore put it into the match block so that it is
6313 correct when calling match() more than once for non-anchored patterns. */
6314 
6315 frame_size = offsetof(heapframe, ovector) +
6316   re->top_bracket * 2 * sizeof(PCRE2_SIZE);
6317 
6318 /* Limits set in the pattern override the match context only if they are
6319 smaller. */
6320 
6321 mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
6322   mcontext->heap_limit : re->limit_heap;
6323 
6324 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6325   mcontext->match_limit : re->limit_match;
6326 
6327 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6328   mcontext->depth_limit : re->limit_depth;
6329 
6330 /* If a pattern has very many capturing parentheses, the frame size may be very
6331 large. Ensure that there are at least 10 available frames by getting an initial
6332 vector on the heap if necessary, except when the heap limit prevents this. Get
6333 fewer if possible. (The heap limit is in kibibytes.) */
6334 
6335 if (frame_size <= START_FRAMES_SIZE/10)
6336   {
6337   mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
6338   mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
6339   }
6340 else
6341   {
6342   mb->frame_vector_size = frame_size * 10;
6343   if ((mb->frame_vector_size / 1024) > mb->heap_limit)
6344     {
6345     if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
6346     mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
6347     }
6348   mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
6349     mb->memctl.memory_data);
6350   if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
6351   }
6352 
6353 mb->match_frames_top =
6354   (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
6355 
6356 /* Write to the ovector within the first frame to mark every capture unset and
6357 to avoid uninitialized memory read errors when it is copied to a new frame. */
6358 
6359 memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
6360   re->top_bracket * 2 * sizeof(PCRE2_SIZE));
6361 
6362 /* Pointers to the individual character tables */
6363 
6364 mb->lcc = re->tables + lcc_offset;
6365 mb->fcc = re->tables + fcc_offset;
6366 mb->ctypes = re->tables + ctypes_offset;
6367 
6368 /* Set up the first code unit to match, if available. If there's no first code
6369 unit there may be a bitmap of possible first characters. */
6370 
6371 if ((re->flags & PCRE2_FIRSTSET) != 0)
6372   {
6373   has_first_cu = TRUE;
6374   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6375   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6376     {
6377     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6378 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6379     if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
6380 #endif
6381     }
6382   }
6383 else
6384   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6385     start_bits = re->start_bitmap;
6386 
6387 /* There may also be a "last known required character" set. */
6388 
6389 if ((re->flags & PCRE2_LASTSET) != 0)
6390   {
6391   has_req_cu = TRUE;
6392   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6393   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6394     {
6395     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6396 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6397     if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
6398 #endif
6399     }
6400   }
6401 
6402 
6403 /* ==========================================================================*/
6404 
6405 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6406 the loop runs just once. */
6407 
6408 for(;;)
6409   {
6410   PCRE2_SPTR new_start_match;
6411 
6412   /* ----------------- Start of match optimizations ---------------- */
6413 
6414   /* There are some optimizations that avoid running the match if a known
6415   starting point is not found, or if a known later code unit is not present.
6416   However, there is an option (settable at compile time) that disables these,
6417   for testing and for ensuring that all callouts do actually occur. */
6418 
6419   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6420     {
6421     /* If firstline is TRUE, the start of the match is constrained to the first
6422     line of a multiline string. That is, the match must be before or at the
6423     first newline following the start of matching. Temporarily adjust
6424     end_subject so that we stop the scans for a first code unit at a newline.
6425     If the match fails at the newline, later code breaks the loop. */
6426 
6427     if (firstline)
6428       {
6429       PCRE2_SPTR t = start_match;
6430 #ifdef SUPPORT_UNICODE
6431       if (utf)
6432         {
6433         while (t < end_subject && !IS_NEWLINE(t))
6434           {
6435           t++;
6436           ACROSSCHAR(t < end_subject, t, t++);
6437           }
6438         }
6439       else
6440 #endif
6441       while (t < end_subject && !IS_NEWLINE(t)) t++;
6442       end_subject = t;
6443       }
6444 
6445     /* Anchored: check the first code unit if one is recorded. This may seem
6446     pointless but it can help in detecting a no match case without scanning for
6447     the required code unit. */
6448 
6449     if (anchored)
6450       {
6451       if (has_first_cu || start_bits != NULL)
6452         {
6453         BOOL ok = start_match < end_subject;
6454         if (ok)
6455           {
6456           PCRE2_UCHAR c = UCHAR21TEST(start_match);
6457           ok = has_first_cu && (c == first_cu || c == first_cu2);
6458           if (!ok && start_bits != NULL)
6459             {
6460 #if PCRE2_CODE_UNIT_WIDTH != 8
6461             if (c > 255) c = 255;
6462 #endif
6463             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6464             }
6465           }
6466         if (!ok)
6467           {
6468           rc = MATCH_NOMATCH;
6469           break;
6470           }
6471         }
6472       }
6473 
6474     /* Not anchored. Advance to a unique first code unit if there is one. In
6475     8-bit mode, the use of memchr() gives a big speed up, even though we have
6476     to call it twice in caseless mode, in order to find the earliest occurrence
6477     of the character in either of its cases. */
6478 
6479     else
6480       {
6481       if (has_first_cu)
6482         {
6483         if (first_cu != first_cu2)  /* Caseless */
6484           {
6485 #if PCRE2_CODE_UNIT_WIDTH != 8
6486           PCRE2_UCHAR smc;
6487           while (start_match < end_subject &&
6488                 (smc = UCHAR21TEST(start_match)) != first_cu &&
6489                   smc != first_cu2)
6490             start_match++;
6491 #else  /* 8-bit code units */
6492           PCRE2_SPTR pp1 =
6493             memchr(start_match, first_cu, end_subject-start_match);
6494           PCRE2_SPTR pp2 =
6495             memchr(start_match, first_cu2, end_subject-start_match);
6496           if (pp1 == NULL)
6497             start_match = (pp2 == NULL)? end_subject : pp2;
6498           else
6499             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
6500 #endif
6501           }
6502 
6503         /* The caseful case */
6504 
6505         else
6506           {
6507 #if PCRE2_CODE_UNIT_WIDTH != 8
6508           while (start_match < end_subject && UCHAR21TEST(start_match) !=
6509                  first_cu)
6510             start_match++;
6511 #else
6512           start_match = memchr(start_match, first_cu, end_subject - start_match);
6513           if (start_match == NULL) start_match = end_subject;
6514 #endif
6515           }
6516 
6517         /* If we can't find the required code unit, having reached the true end
6518         of the subject, break the bumpalong loop, to force a match failure,
6519         except when doing partial matching, when we let the next cycle run at
6520         the end of the subject. To see why, consider the pattern /(?<=abc)def/,
6521         which partially matches "abc", even though the string does not contain
6522         the starting character "d". If we have not reached the true end of the
6523         subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
6524         we also let the cycle run, because the matching string is legitimately
6525         allowed to start with the first code unit of a newline. */
6526 
6527         if (!mb->partial && start_match >= mb->end_subject)
6528           {
6529           rc = MATCH_NOMATCH;
6530           break;
6531           }
6532         }
6533 
6534       /* If there's no first code unit, advance to just after a linebreak for a
6535       multiline match if required. */
6536 
6537       else if (startline)
6538         {
6539         if (start_match > mb->start_subject + start_offset)
6540           {
6541 #ifdef SUPPORT_UNICODE
6542           if (utf)
6543             {
6544             while (start_match < end_subject && !WAS_NEWLINE(start_match))
6545               {
6546               start_match++;
6547               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
6548               }
6549             }
6550           else
6551 #endif
6552           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6553             start_match++;
6554 
6555           /* If we have just passed a CR and the newline option is ANY or
6556           ANYCRLF, and we are now at a LF, advance the match position by one
6557           more code unit. */
6558 
6559           if (start_match[-1] == CHAR_CR &&
6560                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
6561                start_match < end_subject &&
6562                UCHAR21TEST(start_match) == CHAR_NL)
6563             start_match++;
6564           }
6565         }
6566 
6567       /* If there's no first code unit or a requirement for a multiline line
6568       start, advance to a non-unique first code unit if any have been
6569       identified. The bitmap contains only 256 bits. When code units are 16 or
6570       32 bits wide, all code units greater than 254 set the 255 bit. */
6571 
6572       else if (start_bits != NULL)
6573         {
6574         while (start_match < end_subject)
6575           {
6576           uint32_t c = UCHAR21TEST(start_match);
6577 #if PCRE2_CODE_UNIT_WIDTH != 8
6578           if (c > 255) c = 255;
6579 #endif
6580           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
6581           start_match++;
6582           }
6583 
6584         /* See comment above in first_cu checking about the next few lines. */
6585 
6586         if (!mb->partial && start_match >= mb->end_subject)
6587           {
6588           rc = MATCH_NOMATCH;
6589           break;
6590           }
6591         }
6592       }   /* End first code unit handling */
6593 
6594     /* Restore fudged end_subject */
6595 
6596     end_subject = mb->end_subject;
6597 
6598     /* The following two optimizations must be disabled for partial matching. */
6599 
6600     if (!mb->partial)
6601       {
6602       /* The minimum matching length is a lower bound; no string of that length
6603       may actually match the pattern. Although the value is, strictly, in
6604       characters, we treat it as code units to avoid spending too much time in
6605       this optimization. */
6606 
6607       if (end_subject - start_match < re->minlength)
6608         {
6609         rc = MATCH_NOMATCH;
6610         break;
6611         }
6612 
6613       /* If req_cu is set, we know that that code unit must appear in the
6614       subject for the (non-partial) match to succeed. If the first code unit is
6615       set, req_cu must be later in the subject; otherwise the test starts at
6616       the match point. This optimization can save a huge amount of backtracking
6617       in patterns with nested unlimited repeats that aren't going to match.
6618       Writing separate code for caseful/caseless versions makes it go faster,
6619       as does using an autoincrement and backing off on a match. As in the case
6620       of the first code unit, using memchr() in the 8-bit library gives a big
6621       speed up. Unlike the first_cu check above, we do not need to call
6622       memchr() twice in the caseless case because we only need to check for the
6623       presence of the character in either case, not find the first occurrence.
6624 
6625       HOWEVER: when the subject string is very, very long, searching to its end
6626       can take a long time, and give bad performance on quite ordinary
6627       patterns. This showed up when somebody was matching something like
6628       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
6629       sufficiently long. */
6630 
6631       if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
6632         {
6633         PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
6634 
6635         /* We don't need to repeat the search if we haven't yet reached the
6636         place we found it last time round the bumpalong loop. */
6637 
6638         if (p > req_cu_ptr)
6639           {
6640           if (p < end_subject)
6641             {
6642             if (req_cu != req_cu2)  /* Caseless */
6643               {
6644 #if PCRE2_CODE_UNIT_WIDTH != 8
6645               do
6646                 {
6647                 uint32_t pp = UCHAR21INCTEST(p);
6648                 if (pp == req_cu || pp == req_cu2) { p--; break; }
6649                 }
6650               while (p < end_subject);
6651 
6652 #else  /* 8-bit code units */
6653               PCRE2_SPTR pp = p;
6654               p = memchr(pp, req_cu, end_subject - pp);
6655               if (p == NULL)
6656                 {
6657                 p = memchr(pp, req_cu2, end_subject - pp);
6658                 if (p == NULL) p = end_subject;
6659                 }
6660 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
6661               }
6662 
6663             /* The caseful case */
6664 
6665             else
6666               {
6667 #if PCRE2_CODE_UNIT_WIDTH != 8
6668               do
6669                 {
6670                 if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
6671                 }
6672               while (p < end_subject);
6673 
6674 #else  /* 8-bit code units */
6675               p = memchr(p, req_cu, end_subject - p);
6676               if (p == NULL) p = end_subject;
6677 #endif
6678               }
6679             }
6680 
6681           /* If we can't find the required code unit, break the bumpalong loop,
6682           forcing a match failure. */
6683 
6684           if (p >= end_subject)
6685             {
6686             rc = MATCH_NOMATCH;
6687             break;
6688             }
6689 
6690           /* If we have found the required code unit, save the point where we
6691           found it, so that we don't search again next time round the bumpalong
6692           loop if the start hasn't yet passed this code unit. */
6693 
6694           req_cu_ptr = p;
6695           }
6696         }
6697       }
6698     }
6699 
6700   /* ------------ End of start of match optimizations ------------ */
6701 
6702   /* Give no match if we have passed the bumpalong limit. */
6703 
6704   if (start_match > bumpalong_limit)
6705     {
6706     rc = MATCH_NOMATCH;
6707     break;
6708     }
6709 
6710   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6711   first starting point for which a partial match was found. */
6712 
6713   cb.start_match = (PCRE2_SIZE)(start_match - subject);
6714   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
6715 
6716   mb->start_used_ptr = start_match;
6717   mb->last_used_ptr = start_match;
6718   mb->match_call_count = 0;
6719   mb->end_offset_top = 0;
6720   mb->skip_arg_count = 0;
6721 
6722   rc = match(start_match, mb->start_code, match_data->ovector,
6723     match_data->oveccount, re->top_bracket, frame_size, mb);
6724 
6725   if (mb->hitend && start_partial == NULL)
6726     {
6727     start_partial = mb->start_used_ptr;
6728     match_partial = start_match;
6729     }
6730 
6731   switch(rc)
6732     {
6733     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6734     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6735     entirely. The only way we can do that is to re-do the match at the same
6736     point, with a flag to force SKIP with an argument to be ignored. Just
6737     treating this case as NOMATCH does not work because it does not check other
6738     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6739 
6740     case MATCH_SKIP_ARG:
6741     new_start_match = start_match;
6742     mb->ignore_skip_arg = mb->skip_arg_count;
6743     break;
6744 
6745     /* SKIP passes back the next starting point explicitly, but if it is no
6746     greater than the match we have just done, treat it as NOMATCH. */
6747 
6748     case MATCH_SKIP:
6749     if (mb->verb_skip_ptr > start_match)
6750       {
6751       new_start_match = mb->verb_skip_ptr;
6752       break;
6753       }
6754     /* Fall through */
6755 
6756     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6757     exactly like PRUNE. Unset ignore SKIP-with-argument. */
6758 
6759     case MATCH_NOMATCH:
6760     case MATCH_PRUNE:
6761     case MATCH_THEN:
6762     mb->ignore_skip_arg = 0;
6763     new_start_match = start_match + 1;
6764 #ifdef SUPPORT_UNICODE
6765     if (utf)
6766       ACROSSCHAR(new_start_match < end_subject, new_start_match,
6767         new_start_match++);
6768 #endif
6769     break;
6770 
6771     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6772 
6773     case MATCH_COMMIT:
6774     rc = MATCH_NOMATCH;
6775     goto ENDLOOP;
6776 
6777     /* Any other return is either a match, or some kind of error. */
6778 
6779     default:
6780     goto ENDLOOP;
6781     }
6782 
6783   /* Control reaches here for the various types of "no match at this point"
6784   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6785 
6786   rc = MATCH_NOMATCH;
6787 
6788   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
6789   newline in the subject (though it may continue over the newline). Therefore,
6790   if we have just failed to match, starting at a newline, do not continue. */
6791 
6792   if (firstline && IS_NEWLINE(start_match)) break;
6793 
6794   /* Advance to new matching position */
6795 
6796   start_match = new_start_match;
6797 
6798   /* Break the loop if the pattern is anchored or if we have passed the end of
6799   the subject. */
6800 
6801   if (anchored || start_match > end_subject) break;
6802 
6803   /* If we have just passed a CR and we are now at a LF, and the pattern does
6804   not contain any explicit matches for \r or \n, and the newline option is CRLF
6805   or ANY or ANYCRLF, advance the match position by one more code unit. In
6806   normal matching start_match will aways be greater than the first position at
6807   this stage, but a failed *SKIP can cause a return at the same point, which is
6808   why the first test exists. */
6809 
6810   if (start_match > subject + start_offset &&
6811       start_match[-1] == CHAR_CR &&
6812       start_match < end_subject &&
6813       *start_match == CHAR_NL &&
6814       (re->flags & PCRE2_HASCRORLF) == 0 &&
6815         (mb->nltype == NLTYPE_ANY ||
6816          mb->nltype == NLTYPE_ANYCRLF ||
6817          mb->nllen == 2))
6818     start_match++;
6819 
6820   mb->mark = NULL;   /* Reset for start of next match attempt */
6821   }                  /* End of for(;;) "bumpalong" loop */
6822 
6823 /* ==========================================================================*/
6824 
6825 /* When we reach here, one of the following stopping conditions is true:
6826 
6827 (1) The match succeeded, either completely, or partially;
6828 
6829 (2) The pattern is anchored or the match was failed after (*COMMIT);
6830 
6831 (3) We are past the end of the subject or the bumpalong limit;
6832 
6833 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
6834     this option requests that a match occur at or before the first newline in
6835     the subject.
6836 
6837 (5) Some kind of error occurred.
6838 
6839 */
6840 
6841 ENDLOOP:
6842 
6843 /* Release an enlarged frame vector that is on the heap. */
6844 
6845 if (mb->match_frames != mb->stack_frames)
6846   mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
6847 
6848 /* Fill in fields that are always returned in the match data. */
6849 
6850 match_data->code = re;
6851 match_data->mark = mb->mark;
6852 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
6853 
6854 /* Handle a fully successful match. Set the return code to the number of
6855 captured strings, or 0 if there were too many to fit into the ovector, and then
6856 set the remaining returned values before returning. Make a copy of the subject
6857 string if requested. */
6858 
6859 if (rc == MATCH_MATCH)
6860   {
6861   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
6862     0 : (int)mb->end_offset_top/2 + 1;
6863   match_data->startchar = start_match - subject;
6864   match_data->leftchar = mb->start_used_ptr - subject;
6865   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
6866     mb->last_used_ptr : mb->end_match_ptr) - subject;
6867   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6868     {
6869     length = CU2BYTES(length + was_zero_terminated);
6870     match_data->subject = match_data->memctl.malloc(length,
6871       match_data->memctl.memory_data);
6872     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6873     memcpy((void *)match_data->subject, subject, length);
6874     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6875     }
6876   else match_data->subject = subject;
6877   return match_data->rc;
6878   }
6879 
6880 /* Control gets here if there has been a partial match, an error, or if the
6881 overall match attempt has failed at all permitted starting positions. Any mark
6882 data is in the nomatch_mark field. */
6883 
6884 match_data->mark = mb->nomatch_mark;
6885 
6886 /* For anything other than nomatch or partial match, just return the code. */
6887 
6888 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
6889 
6890 /* Handle a partial match. If a "soft" partial match was requested, searching
6891 for a complete match will have continued, and the value of rc at this point
6892 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
6893 PCRE2_ERROR_PARTIAL. */
6894 
6895 else if (match_partial != NULL)
6896   {
6897   match_data->subject = subject;
6898   match_data->ovector[0] = match_partial - subject;
6899   match_data->ovector[1] = end_subject - subject;
6900   match_data->startchar = match_partial - subject;
6901   match_data->leftchar = start_partial - subject;
6902   match_data->rightchar = end_subject - subject;
6903   match_data->rc = PCRE2_ERROR_PARTIAL;
6904   }
6905 
6906 /* Else this is the classic nomatch case. */
6907 
6908 else match_data->rc = PCRE2_ERROR_NOMATCH;
6909 
6910 return match_data->rc;
6911 }
6912 
6913 /* End of pcre2_match.c */
6914