• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2020 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* These defines enable debugging code */
47 
48 /* #define DEBUG_FRAMES_DISPLAY */
49 /* #define DEBUG_SHOW_OPS */
50 /* #define DEBUG_SHOW_RMATCH */
51 
52 #ifdef DEBUG_FRAME_DISPLAY
53 #include <stdarg.h>
54 #endif
55 
56 /* These defines identify the name of the block containing "static"
57 information, and fields within it. */
58 
59 #define NLBLOCK mb              /* Block containing newline information */
60 #define PSSTART start_subject   /* Field containing processed string start */
61 #define PSEND   end_subject     /* Field containing processed string end */
62 
63 #include "pcre2_internal.h"
64 
65 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66 
67 /* Masks for identifying the public options that are permitted at match time. */
68 
69 #define PUBLIC_MATCH_OPTIONS \
70   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73 
74 #define PUBLIC_JIT_MATCH_OPTIONS \
75    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77     PCRE2_COPY_MATCHED_SUBJECT)
78 
79 /* Non-error returns from and within the match() function. Error returns are
80 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81 
82 #define MATCH_MATCH        1
83 #define MATCH_NOMATCH      0
84 
85 /* Special internal returns used in the match() function. Make them
86 sufficiently negative to avoid the external error codes. */
87 
88 #define MATCH_ACCEPT       (-999)
89 #define MATCH_KETRPOS      (-998)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-997)
93 #define MATCH_PRUNE        (-996)
94 #define MATCH_SKIP         (-995)
95 #define MATCH_SKIP_ARG     (-994)
96 #define MATCH_THEN         (-993)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Group frame type values. Zero means the frame is not a group frame. The
101 lower 16 bits are used for data (e.g. the capture number). Group frames are
102 used for most groups so that information about the start is easily available at
103 the end without having to scan back through intermediate frames (backtrack
104 points). */
105 
106 #define GF_CAPTURE     0x00010000u
107 #define GF_NOCAPTURE   0x00020000u
108 #define GF_CONDASSERT  0x00030000u
109 #define GF_RECURSE     0x00040000u
110 
111 /* Masks for the identity and data parts of the group frame type. */
112 
113 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
114 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115 
116 /* Repetition types */
117 
118 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119 
120 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121 infinity. */
122 
123 static const uint32_t rep_min[] = {
124   0, 0,       /* * and *? */
125   1, 1,       /* + and +? */
126   0, 0,       /* ? and ?? */
127   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129 
130 static const uint32_t rep_max[] = {
131   UINT32_MAX, UINT32_MAX,      /* * and *? */
132   UINT32_MAX, UINT32_MAX,      /* + and +? */
133   1, 1,                        /* ? and ?? */
134   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136 
137 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138 
139 static const uint32_t rep_typ[] = {
140   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146 
147 /* Numbers for RMATCH calls at backtracking points. When these lists are
148 changed, the code at RETURN_SWITCH below must be updated in sync.  */
149 
150 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153        RM31,  RM32, RM33, RM34, RM35, RM36 };
154 
155 #ifdef SUPPORT_WIDE_CHARS
156 enum { RM100=100, RM101 };
157 #endif
158 
159 #ifdef SUPPORT_UNICODE
160 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162        RM216,     RM217, RM218, RM219, RM220, RM221, RM222 };
163 #endif
164 
165 /* Define short names for general fields in the current backtrack frame, which
166 is always pointed to by the F variable. Occasional references to fields in
167 other frames are written out explicitly. There are also some fields in the
168 current frame whose names start with "temp" that are used for short-term,
169 localised backtracking memory. These are #defined with Lxxx names at the point
170 of use and undefined afterwards. */
171 
172 #define Fback_frame        F->back_frame
173 #define Fcapture_last      F->capture_last
174 #define Fcurrent_recurse   F->current_recurse
175 #define Fecode             F->ecode
176 #define Feptr              F->eptr
177 #define Fgroup_frame_type  F->group_frame_type
178 #define Flast_group_offset F->last_group_offset
179 #define Flength            F->length
180 #define Fmark              F->mark
181 #define Frdepth            F->rdepth
182 #define Fstart_match       F->start_match
183 #define Foffset_top        F->offset_top
184 #define Foccu              F->occu
185 #define Fop                F->op
186 #define Fovector           F->ovector
187 #define Freturn_id         F->return_id
188 
189 
190 #ifdef DEBUG_FRAMES_DISPLAY
191 /*************************************************
192 *      Display current frames and contents       *
193 *************************************************/
194 
195 /* This debugging function displays the current set of frames and their
196 contents. It is not called automatically from anywhere, the intention being
197 that calls can be inserted where necessary when debugging frame-related
198 problems.
199 
200 Arguments:
201   f           the file to write to
202   F           the current top frame
203   P           a previous frame of interest
204   frame_size  the frame size
205   mb          points to the match block
206   s           identification text
207 
208 Returns:    nothing
209 */
210 
211 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,const char * s,...)212 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
213   match_block *mb, const char *s, ...)
214 {
215 uint32_t i;
216 heapframe *Q;
217 va_list ap;
218 va_start(ap, s);
219 
220 fprintf(f, "FRAMES ");
221 vfprintf(f, s, ap);
222 va_end(ap);
223 
224 if (P != NULL) fprintf(f, " P=%lu",
225   ((char *)P - (char *)(mb->match_frames))/frame_size);
226 fprintf(f, "\n");
227 
228 for (i = 0, Q = mb->match_frames;
229      Q <= F;
230      i++, Q = (heapframe *)((char *)Q + frame_size))
231   {
232   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
233     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
234     Q->back_frame, Q->return_id);
235 
236   if (Q->last_group_offset == PCRE2_UNSET)
237     fprintf(f, " lgoffset=unset\n");
238   else
239     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
240   }
241 }
242 
243 #endif
244 
245 
246 
247 /*************************************************
248 *                Process a callout               *
249 *************************************************/
250 
251 /* This function is called for all callouts, whether "standalone" or at the
252 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
253 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
254 with fixed values.
255 
256 Arguments:
257   F          points to the current backtracking frame
258   mb         points to the match block
259   lengthptr  where to return the length of the callout item
260 
261 Returns:     the return from the callout
262              or 0 if no callout function exists
263 */
264 
265 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)266 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
267 {
268 int rc;
269 PCRE2_SIZE save0, save1;
270 PCRE2_SIZE *callout_ovector;
271 pcre2_callout_block *cb;
272 
273 *lengthptr = (*Fecode == OP_CALLOUT)?
274   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
275 
276 if (mb->callout == NULL) return 0;   /* No callout function provided */
277 
278 /* The original matching code (pre 10.30) worked directly with the ovector
279 passed by the user, and this was passed to callouts. Now that the working
280 ovector is in the backtracking frame, it no longer needs to reserve space for
281 the overall match offsets (which would waste space in the frame). For backward
282 compatibility, however, we pass capture_top and offset_vector to the callout as
283 if for the extended ovector, and we ensure that the first two slots are unset
284 by preserving and restoring their current contents. Picky compilers complain if
285 references such as Fovector[-2] are use directly, so we set up a separate
286 pointer. */
287 
288 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
289 
290 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
291 are set externally. The first 3 never change; the last is updated for each
292 bumpalong. */
293 
294 cb = mb->cb;
295 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
296 cb->capture_last     = Fcapture_last;
297 cb->offset_vector    = callout_ovector;
298 cb->mark             = mb->nomatch_mark;
299 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
300 cb->pattern_position = GET(Fecode, 1);
301 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
302 
303 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
304   {
305   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
306   cb->callout_string_offset = 0;
307   cb->callout_string = NULL;
308   cb->callout_string_length = 0;
309   }
310 else  /* String callout */
311   {
312   cb->callout_number = 0;
313   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
314   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
315   cb->callout_string_length =
316     *lengthptr - (1 + 4*LINK_SIZE) - 2;
317   }
318 
319 save0 = callout_ovector[0];
320 save1 = callout_ovector[1];
321 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
322 rc = mb->callout(cb, mb->callout_data);
323 callout_ovector[0] = save0;
324 callout_ovector[1] = save1;
325 cb->callout_flags = 0;
326 return rc;
327 }
328 
329 
330 
331 /*************************************************
332 *          Match a back-reference                *
333 *************************************************/
334 
335 /* This function is called only when it is known that the offset lies within
336 the offsets that have so far been used in the match. Note that in caseless
337 UTF-8 mode, the number of subject bytes matched may be different to the number
338 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
339 seems unlikely.)
340 
341 Arguments:
342   offset      index into the offset vector
343   caseless    TRUE if caseless
344   F           the current backtracking frame pointer
345   mb          points to match block
346   lengthptr   pointer for returning the length matched
347 
348 Returns:      = 0 sucessful match; number of code units matched is set
349               < 0 no match
350               > 0 partial match
351 */
352 
353 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)354 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
355   PCRE2_SIZE *lengthptr)
356 {
357 PCRE2_SPTR p;
358 PCRE2_SIZE length;
359 PCRE2_SPTR eptr;
360 PCRE2_SPTR eptr_start;
361 
362 /* Deal with an unset group. The default is no match, but there is an option to
363 match an empty string. */
364 
365 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
366   {
367   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
368     {
369     *lengthptr = 0;
370     return 0;      /* Match */
371     }
372   else return -1;  /* No match */
373   }
374 
375 /* Separate the caseless and UTF cases for speed. */
376 
377 eptr = eptr_start = Feptr;
378 p = mb->start_subject + Fovector[offset];
379 length = Fovector[offset+1] - Fovector[offset];
380 
381 if (caseless)
382   {
383 #if defined SUPPORT_UNICODE
384   BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
385 
386   if (utf || (mb->poptions & PCRE2_UCP) != 0)
387     {
388     PCRE2_SPTR endptr = p + length;
389 
390     /* Match characters up to the end of the reference. NOTE: the number of
391     code units matched may differ, because in UTF-8 there are some characters
392     whose upper and lower case codes have different numbers of bytes. For
393     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
394     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
395     sequence of two of the latter. It is important, therefore, to check the
396     length along the reference, not along the subject (earlier code did this
397     wrong). UCP without uses Unicode properties but without UTF encoding. */
398 
399     while (p < endptr)
400       {
401       uint32_t c, d;
402       const ucd_record *ur;
403       if (eptr >= mb->end_subject) return 1;   /* Partial match */
404 
405       if (utf)
406         {
407         GETCHARINC(c, eptr);
408         GETCHARINC(d, p);
409         }
410       else
411         {
412         c = *eptr++;
413         d = *p++;
414         }
415 
416       ur = GET_UCD(d);
417       if (c != d && c != (uint32_t)((int)d + ur->other_case))
418         {
419         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
420         for (;;)
421           {
422           if (c < *pp) return -1;  /* No match */
423           if (c == *pp++) break;
424           }
425         }
426       }
427     }
428   else
429 #endif
430 
431   /* Not in UTF or UCP mode */
432     {
433     for (; length > 0; length--)
434       {
435       uint32_t cc, cp;
436       if (eptr >= mb->end_subject) return 1;   /* Partial match */
437       cc = UCHAR21TEST(eptr);
438       cp = UCHAR21TEST(p);
439       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
440         return -1;  /* No match */
441       p++;
442       eptr++;
443       }
444     }
445   }
446 
447 /* In the caseful case, we can just compare the code units, whether or not we
448 are in UTF and/or UCP mode. When partial matching, we have to do this unit by
449 unit. */
450 
451 else
452   {
453   if (mb->partial != 0)
454     {
455     for (; length > 0; length--)
456       {
457       if (eptr >= mb->end_subject) return 1;   /* Partial match */
458       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
459       }
460     }
461 
462   /* Not partial matching */
463 
464   else
465     {
466     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
467     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
468     eptr += length;
469     }
470   }
471 
472 *lengthptr = eptr - eptr_start;
473 return 0;  /* Match */
474 }
475 
476 
477 
478 /******************************************************************************
479 *******************************************************************************
480                    "Recursion" in the match() function
481 
482 The original match() function was highly recursive, but this proved to be the
483 source of a number of problems over the years, mostly because of the relatively
484 small system stacks that are commonly found. As new features were added to
485 patterns, various kludges were invented to reduce the amount of stack used,
486 making the code hard to understand in places.
487 
488 A version did exist that used individual frames on the heap instead of calling
489 match() recursively, but this ran substantially slower. The current version is
490 a refactoring that uses a vector of frames to remember backtracking points.
491 This runs no slower, and possibly even a bit faster than the original recursive
492 implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
493 50 frames) is allocated on the system stack. If this is not big enough, the
494 heap is used for a larger vector.
495 
496 *******************************************************************************
497 ******************************************************************************/
498 
499 
500 
501 
502 /*************************************************
503 *       Macros for the match() function          *
504 *************************************************/
505 
506 /* These macros pack up tests that are used for partial matching several times
507 in the code. The second one is used when we already know we are past the end of
508 the subject. We set the "hit end" flag if the pointer is at the end of the
509 subject and either (a) the pointer is past the earliest inspected character
510 (i.e. something has been matched, even if not part of the actual matched
511 string), or (b) the pattern contains a lookbehind. These are the conditions for
512 which adding more characters may allow the current match to continue.
513 
514 For hard partial matching, we immediately return a partial match. Otherwise,
515 carrying on means that a complete match on the current subject will be sought.
516 A partial match is returned only if no complete match can be found. */
517 
518 #define CHECK_PARTIAL()\
519   if (Feptr >= mb->end_subject) \
520     { \
521     SCHECK_PARTIAL(); \
522     }
523 
524 #define SCHECK_PARTIAL()\
525   if (mb->partial != 0 && \
526       (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
527     { \
528     mb->hitend = TRUE; \
529     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
530     }
531 
532 
533 /* These macros are used to implement backtracking. They simulate a recursive
534 call to the match() function by means of a local vector of frames which
535 remember the backtracking points. */
536 
537 #define RMATCH(ra,rb)\
538   {\
539   start_ecode = ra;\
540   Freturn_id = rb;\
541   goto MATCH_RECURSE;\
542   L_##rb:;\
543   }
544 
545 #define RRETURN(ra)\
546   {\
547   rrc = ra;\
548   goto RETURN_SWITCH;\
549   }
550 
551 
552 
553 /*************************************************
554 *         Match from current position            *
555 *************************************************/
556 
557 /* This function is called to run one match attempt at a single starting point
558 in the subject.
559 
560 Performance note: It might be tempting to extract commonly used fields from the
561 mb structure (e.g. end_subject) into individual variables to improve
562 performance. Tests using gcc on a SPARC disproved this; in the first case, it
563 made performance worse.
564 
565 Arguments:
566    start_eptr   starting character in subject
567    start_ecode  starting position in compiled code
568    ovector      pointer to the final output vector
569    oveccount    number of pairs in ovector
570    top_bracket  number of capturing parentheses in the pattern
571    frame_size   size of each backtracking frame
572    mb           pointer to "static" variables block
573 
574 Returns:        MATCH_MATCH if matched            )  these values are >= 0
575                 MATCH_NOMATCH if failed to match  )
576                 negative MATCH_xxx value for PRUNE, SKIP, etc
577                 negative PCRE2_ERROR_xxx value if aborted by an error condition
578                 (e.g. stopped by repeated call or depth limit)
579 */
580 
581 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,PCRE2_SIZE * ovector,uint16_t oveccount,uint16_t top_bracket,PCRE2_SIZE frame_size,match_block * mb)582 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
583   uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
584   match_block *mb)
585 {
586 /* Frame-handling variables */
587 
588 heapframe *F;           /* Current frame pointer */
589 heapframe *N = NULL;    /* Temporary frame pointers */
590 heapframe *P = NULL;
591 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
592 PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */
593 
594 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
595 
596 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
597 PCRE2_SIZE offset;      /* Used for group offsets */
598 PCRE2_SIZE length;      /* Used for various length calculations */
599 
600 int rrc;                /* Return from functions & backtracking "recursions" */
601 #ifdef SUPPORT_UNICODE
602 int proptype;           /* Type of character property */
603 #endif
604 
605 uint32_t i;             /* Used for local loops */
606 uint32_t fc;            /* Character values */
607 uint32_t number;        /* Used for group and other numbers */
608 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
609 uint32_t group_frame_type;  /* Specifies type for new group frames */
610 
611 BOOL condition;         /* Used in conditional groups */
612 BOOL cur_is_word;       /* Used in "word" tests */
613 BOOL prev_is_word;      /* Used in "word" tests */
614 
615 /* UTF and UCP flags */
616 
617 #ifdef SUPPORT_UNICODE
618 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
619 BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
620 #else
621 BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
622 #endif
623 
624 /* This is the length of the last part of a backtracking frame that must be
625 copied when a new frame is created. */
626 
627 frame_copy_size = frame_size - offsetof(heapframe, eptr);
628 
629 /* Set up the first current frame at the start of the vector, and initialize
630 fields that are not reset for new frames. */
631 
632 F = mb->match_frames;
633 Frdepth = 0;                        /* "Recursion" depth */
634 Fcapture_last = 0;                  /* Number of most recent capture */
635 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
636 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
637 Fmark = NULL;                       /* Most recent mark */
638 Foffset_top = 0;                    /* End of captures within the frame */
639 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
640 group_frame_type = 0;               /* Not a start of group frame */
641 goto NEW_FRAME;                     /* Start processing with this frame */
642 
643 /* Come back here when we want to create a new frame for remembering a
644 backtracking point. */
645 
646 MATCH_RECURSE:
647 
648 /* Set up a new backtracking frame. If the vector is full, get a new one
649 on the heap, doubling the size, but constrained by the heap limit. */
650 
651 N = (heapframe *)((char *)F + frame_size);
652 if (N >= mb->match_frames_top)
653   {
654   PCRE2_SIZE newsize = mb->frame_vector_size * 2;
655   heapframe *new;
656 
657   if ((newsize / 1024) > mb->heap_limit)
658     {
659     PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
660     if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
661     newsize = maxsize;
662     }
663 
664   new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
665   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
666   memcpy(new, mb->match_frames, mb->frame_vector_size);
667 
668   F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
669   N = (heapframe *)((char *)F + frame_size);
670 
671   if (mb->match_frames != mb->stack_frames)
672     mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
673   mb->match_frames = new;
674   mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
675   mb->frame_vector_size = newsize;
676   }
677 
678 #ifdef DEBUG_SHOW_RMATCH
679 fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
680 if (group_frame_type != 0)
681   {
682   fprintf(stderr, " type=%x ", group_frame_type);
683   switch (GF_IDMASK(group_frame_type))
684     {
685     case GF_CAPTURE:
686     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
687     break;
688 
689     case GF_NOCAPTURE:
690     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
691     break;
692 
693     case GF_CONDASSERT:
694     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
695     break;
696 
697     case GF_RECURSE:
698     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
699     break;
700 
701     default:
702     fprintf(stderr, "*** unknown ***");
703     break;
704     }
705   }
706 fprintf(stderr, "\n");
707 #endif
708 
709 /* Copy those fields that must be copied into the new frame, increase the
710 "recursion" depth (i.e. the new frame's index) and then make the new frame
711 current. */
712 
713 memcpy((char *)N + offsetof(heapframe, eptr),
714        (char *)F + offsetof(heapframe, eptr),
715        frame_copy_size);
716 
717 N->rdepth = Frdepth + 1;
718 F = N;
719 
720 /* Carry on processing with a new frame. */
721 
722 NEW_FRAME:
723 Fgroup_frame_type = group_frame_type;
724 Fecode = start_ecode;      /* Starting code pointer */
725 Fback_frame = frame_size;  /* Default is go back one frame */
726 
727 /* If this is a special type of group frame, remember its offset for quick
728 access at the end of the group. If this is a recursion, set a new current
729 recursion value. */
730 
731 if (group_frame_type != 0)
732   {
733   Flast_group_offset = (char *)F - (char *)mb->match_frames;
734   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
735     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
736   group_frame_type = 0;
737   }
738 
739 
740 /* ========================================================================= */
741 /* This is the main processing loop. First check that we haven't recorded too
742 many backtracks (search tree is too large), or that we haven't exceeded the
743 recursive depth limit (used too many backtracking frames). If not, process the
744 opcodes. */
745 
746 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
747 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
748 
749 for (;;)
750   {
751 #ifdef DEBUG_SHOW_OPS
752 fprintf(stderr, "++ op=%d\n", *Fecode);
753 #endif
754 
755   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
756   switch(Fop)
757     {
758     /* ===================================================================== */
759     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
760     any currently open capturing brackets. Unlike reaching the end of a group,
761     where we know the starting frame is at the top of the chained frames, in
762     this case we have to search back for the relevant frame in case other types
763     of group that use chained frames have intervened. Multiple OP_CLOSEs always
764     come innermost first, which matches the chain order. We can ignore this in
765     a recursion, because captures are not passed out of recursions. */
766 
767     case OP_CLOSE:
768     if (Fcurrent_recurse == RECURSE_UNSET)
769       {
770       number = GET2(Fecode, 1);
771       offset = Flast_group_offset;
772       for(;;)
773         {
774         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
775         N = (heapframe *)((char *)mb->match_frames + offset);
776         P = (heapframe *)((char *)N - frame_size);
777         if (N->group_frame_type == (GF_CAPTURE | number)) break;
778         offset = P->last_group_offset;
779         }
780       offset = (number << 1) - 2;
781       Fcapture_last = number;
782       Fovector[offset] = P->eptr - mb->start_subject;
783       Fovector[offset+1] = Feptr - mb->start_subject;
784       if (offset >= Foffset_top) Foffset_top = offset + 2;
785       }
786     Fecode += PRIV(OP_lengths)[*Fecode];
787     break;
788 
789 
790     /* ===================================================================== */
791     /* Real or forced end of the pattern, assertion, or recursion. In an
792     assertion ACCEPT, update the last used pointer and remember the current
793     frame so that the captures and mark can be fished out of it. */
794 
795     case OP_ASSERT_ACCEPT:
796     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
797     assert_accept_frame = F;
798     RRETURN(MATCH_ACCEPT);
799 
800     /* If recursing, we have to find the most recent recursion. */
801 
802     case OP_ACCEPT:
803     case OP_END:
804 
805     /* Handle end of a recursion. */
806 
807     if (Fcurrent_recurse != RECURSE_UNSET)
808       {
809       offset = Flast_group_offset;
810       for(;;)
811         {
812         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
813         N = (heapframe *)((char *)mb->match_frames + offset);
814         P = (heapframe *)((char *)N - frame_size);
815         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
816         offset = P->last_group_offset;
817         }
818 
819       /* N is now the frame of the recursion; the previous frame is at the
820       OP_RECURSE position. Go back there, copying the current subject position
821       and mark, and move on past the OP_RECURSE. */
822 
823       P->eptr = Feptr;
824       P->mark = Fmark;
825       F = P;
826       Fecode += 1 + LINK_SIZE;
827       continue;
828       }
829 
830     /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
831     is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
832     start of the subject. In both cases, backtracking will then try other
833     alternatives, if any. */
834 
835     if (Feptr == Fstart_match &&
836          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
837            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
838              Fstart_match == mb->start_subject + mb->start_offset)))
839       RRETURN(MATCH_NOMATCH);
840 
841     /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
842     the end of the subject. After (*ACCEPT) we fail the entire match (at this
843     position) but backtrack on reaching the end of the pattern. */
844 
845     if (Feptr < mb->end_subject &&
846         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
847       {
848       if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
849       return MATCH_NOMATCH;
850       }
851 
852     /* We have a successful match of the whole pattern. Record the result and
853     then do a direct return from the function. If there is space in the offset
854     vector, set any pairs that follow the highest-numbered captured string but
855     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
856     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
857     dynamically. It is only those at the end that need setting here. */
858 
859     mb->end_match_ptr = Feptr;           /* Record where we ended */
860     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
861     mb->mark = Fmark;                    /* and the last success mark */
862     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
863 
864     ovector[0] = Fstart_match - mb->start_subject;
865     ovector[1] = Feptr - mb->start_subject;
866 
867     /* Set i to the smaller of the sizes of the external and frame ovectors. */
868 
869     i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
870     memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
871     while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
872     return MATCH_MATCH;  /* Note: NOT RRETURN */
873 
874 
875     /*===================================================================== */
876     /* Match any single character type except newline; have to take care with
877     CRLF newlines and partial matching. */
878 
879     case OP_ANY:
880     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
881     if (mb->partial != 0 &&
882         Feptr == mb->end_subject - 1 &&
883         NLBLOCK->nltype == NLTYPE_FIXED &&
884         NLBLOCK->nllen == 2 &&
885         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
886       {
887       mb->hitend = TRUE;
888       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
889       }
890     /* Fall through */
891 
892     /* Match any single character whatsoever. */
893 
894     case OP_ALLANY:
895     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
896       {                            /* not be updated before SCHECK_PARTIAL. */
897       SCHECK_PARTIAL();
898       RRETURN(MATCH_NOMATCH);
899       }
900     Feptr++;
901 #ifdef SUPPORT_UNICODE
902     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
903 #endif
904     Fecode++;
905     break;
906 
907 
908     /* ===================================================================== */
909     /* Match a single code unit, even in UTF mode. This opcode really does
910     match any code unit, even newline. (It really should be called ANYCODEUNIT,
911     of course - the byte name is from pre-16 bit days.) */
912 
913     case OP_ANYBYTE:
914     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
915       {                             /* not be updated before SCHECK_PARTIAL. */
916       SCHECK_PARTIAL();
917       RRETURN(MATCH_NOMATCH);
918       }
919     Feptr++;
920     Fecode++;
921     break;
922 
923 
924     /* ===================================================================== */
925     /* Match a single character, casefully */
926 
927     case OP_CHAR:
928 #ifdef SUPPORT_UNICODE
929     if (utf)
930       {
931       Flength = 1;
932       Fecode++;
933       GETCHARLEN(fc, Fecode, Flength);
934       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
935         {
936         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
937         RRETURN(MATCH_NOMATCH);
938         }
939       for (; Flength > 0; Flength--)
940         {
941         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
942         }
943       }
944     else
945 #endif
946 
947     /* Not UTF mode */
948       {
949       if (mb->end_subject - Feptr < 1)
950         {
951         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
952         RRETURN(MATCH_NOMATCH);
953         }
954       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
955       Fecode += 2;
956       }
957     break;
958 
959 
960     /* ===================================================================== */
961     /* Match a single character, caselessly. If we are at the end of the
962     subject, give up immediately. We get here only when the pattern character
963     has at most one other case. Characters with more than two cases are coded
964     as OP_PROP with the pseudo-property PT_CLIST. */
965 
966     case OP_CHARI:
967     if (Feptr >= mb->end_subject)
968       {
969       SCHECK_PARTIAL();
970       RRETURN(MATCH_NOMATCH);
971       }
972 
973 #ifdef SUPPORT_UNICODE
974     if (utf)
975       {
976       Flength = 1;
977       Fecode++;
978       GETCHARLEN(fc, Fecode, Flength);
979 
980       /* If the pattern character's value is < 128, we know that its other case
981       (if any) is also < 128 (and therefore only one code unit long in all
982       code-unit widths), so we can use the fast lookup table. We checked above
983       that there is at least one character left in the subject. */
984 
985       if (fc < 128)
986         {
987         uint32_t cc = UCHAR21(Feptr);
988         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
989         Fecode++;
990         Feptr++;
991         }
992 
993       /* Otherwise we must pick up the subject character and use Unicode
994       property support to test its other case. Note that we cannot use the
995       value of "Flength" to check for sufficient bytes left, because the other
996       case of the character may have more or fewer code units. */
997 
998       else
999         {
1000         uint32_t dc;
1001         GETCHARINC(dc, Feptr);
1002         Fecode += Flength;
1003         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1004         }
1005       }
1006 
1007     /* If UCP is set without UTF we must do the same as above, but with one
1008     character per code unit. */
1009 
1010     else if (ucp)
1011       {
1012       uint32_t cc = UCHAR21(Feptr);
1013       fc = Fecode[1];
1014       if (fc < 128)
1015         {
1016         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1017         }
1018       else
1019         {
1020         if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1021         }
1022       Feptr++;
1023       Fecode += 2;
1024       }
1025 
1026     else
1027 #endif   /* SUPPORT_UNICODE */
1028 
1029     /* Not UTF or UCP mode; use the table for characters < 256. */
1030       {
1031       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1032           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1033       Feptr++;
1034       Fecode += 2;
1035       }
1036     break;
1037 
1038 
1039     /* ===================================================================== */
1040     /* Match not a single character. */
1041 
1042     case OP_NOT:
1043     case OP_NOTI:
1044     if (Feptr >= mb->end_subject)
1045       {
1046       SCHECK_PARTIAL();
1047       RRETURN(MATCH_NOMATCH);
1048       }
1049 
1050 #ifdef SUPPORT_UNICODE
1051     if (utf)
1052       {
1053       uint32_t ch;
1054       Fecode++;
1055       GETCHARINC(ch, Fecode);
1056       GETCHARINC(fc, Feptr);
1057       if (ch == fc)
1058         {
1059         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1060         }
1061       else if (Fop == OP_NOTI)   /* If caseless */
1062         {
1063         if (ch > 127)
1064           ch = UCD_OTHERCASE(ch);
1065         else
1066           ch = (mb->fcc)[ch];
1067         if (ch == fc) RRETURN(MATCH_NOMATCH);
1068         }
1069       }
1070 
1071     /* UCP without UTF is as above, but with one character per code unit. */
1072 
1073     else if (ucp)
1074       {
1075       uint32_t ch;
1076       fc = UCHAR21INC(Feptr);
1077       ch = Fecode[1];
1078       Fecode += 2;
1079 
1080       if (ch == fc)
1081         {
1082         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1083         }
1084       else if (Fop == OP_NOTI)   /* If caseless */
1085         {
1086         if (ch > 127)
1087           ch = UCD_OTHERCASE(ch);
1088         else
1089           ch = (mb->fcc)[ch];
1090         if (ch == fc) RRETURN(MATCH_NOMATCH);
1091         }
1092       }
1093 
1094     else
1095 #endif  /* SUPPORT_UNICODE */
1096 
1097     /* Neither UTF nor UCP is set */
1098 
1099       {
1100       uint32_t ch = Fecode[1];
1101       fc = UCHAR21INC(Feptr);
1102       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1103         RRETURN(MATCH_NOMATCH);
1104       Fecode += 2;
1105       }
1106     break;
1107 
1108 
1109     /* ===================================================================== */
1110     /* Match a single character repeatedly. */
1111 
1112 #define Loclength    F->temp_size
1113 #define Lstart_eptr  F->temp_sptr[0]
1114 #define Lcharptr     F->temp_sptr[1]
1115 #define Lmin         F->temp_32[0]
1116 #define Lmax         F->temp_32[1]
1117 #define Lc           F->temp_32[2]
1118 #define Loc          F->temp_32[3]
1119 
1120     case OP_EXACT:
1121     case OP_EXACTI:
1122     Lmin = Lmax = GET2(Fecode, 1);
1123     Fecode += 1 + IMM2_SIZE;
1124     goto REPEATCHAR;
1125 
1126     case OP_POSUPTO:
1127     case OP_POSUPTOI:
1128     reptype = REPTYPE_POS;
1129     Lmin = 0;
1130     Lmax = GET2(Fecode, 1);
1131     Fecode += 1 + IMM2_SIZE;
1132     goto REPEATCHAR;
1133 
1134     case OP_UPTO:
1135     case OP_UPTOI:
1136     reptype = REPTYPE_MAX;
1137     Lmin = 0;
1138     Lmax = GET2(Fecode, 1);
1139     Fecode += 1 + IMM2_SIZE;
1140     goto REPEATCHAR;
1141 
1142     case OP_MINUPTO:
1143     case OP_MINUPTOI:
1144     reptype = REPTYPE_MIN;
1145     Lmin = 0;
1146     Lmax = GET2(Fecode, 1);
1147     Fecode += 1 + IMM2_SIZE;
1148     goto REPEATCHAR;
1149 
1150     case OP_POSSTAR:
1151     case OP_POSSTARI:
1152     reptype = REPTYPE_POS;
1153     Lmin = 0;
1154     Lmax = UINT32_MAX;
1155     Fecode++;
1156     goto REPEATCHAR;
1157 
1158     case OP_POSPLUS:
1159     case OP_POSPLUSI:
1160     reptype = REPTYPE_POS;
1161     Lmin = 1;
1162     Lmax = UINT32_MAX;
1163     Fecode++;
1164     goto REPEATCHAR;
1165 
1166     case OP_POSQUERY:
1167     case OP_POSQUERYI:
1168     reptype = REPTYPE_POS;
1169     Lmin = 0;
1170     Lmax = 1;
1171     Fecode++;
1172     goto REPEATCHAR;
1173 
1174     case OP_STAR:
1175     case OP_STARI:
1176     case OP_MINSTAR:
1177     case OP_MINSTARI:
1178     case OP_PLUS:
1179     case OP_PLUSI:
1180     case OP_MINPLUS:
1181     case OP_MINPLUSI:
1182     case OP_QUERY:
1183     case OP_QUERYI:
1184     case OP_MINQUERY:
1185     case OP_MINQUERYI:
1186     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1187     Lmin = rep_min[fc];
1188     Lmax = rep_max[fc];
1189     reptype = rep_typ[fc];
1190 
1191     /* Common code for all repeated single-character matches. We first check
1192     for the minimum number of characters. If the minimum equals the maximum, we
1193     are done. Otherwise, if minimizing, check the rest of the pattern for a
1194     match; if there isn't one, advance up to the maximum, one character at a
1195     time.
1196 
1197     If maximizing, advance up to the maximum number of matching characters,
1198     until Feptr is past the end of the maximum run. If possessive, we are
1199     then done (no backing up). Otherwise, match at this position; anything
1200     other than no match is immediately returned. For nomatch, back up one
1201     character, unless we are matching \R and the last thing matched was
1202     \r\n, in which case, back up two code units until we reach the first
1203     optional character position.
1204 
1205     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1206     for speed. */
1207 
1208     REPEATCHAR:
1209 #ifdef SUPPORT_UNICODE
1210     if (utf)
1211       {
1212       Flength = 1;
1213       Lcharptr = Fecode;
1214       GETCHARLEN(fc, Fecode, Flength);
1215       Fecode += Flength;
1216 
1217       /* Handle multi-code-unit character matching, caseful and caseless. */
1218 
1219       if (Flength > 1)
1220         {
1221         uint32_t othercase;
1222 
1223         if (Fop >= OP_STARI &&     /* Caseless */
1224             (othercase = UCD_OTHERCASE(fc)) != fc)
1225           Loclength = PRIV(ord2utf)(othercase, Foccu);
1226         else Loclength = 0;
1227 
1228         for (i = 1; i <= Lmin; i++)
1229           {
1230           if (Feptr <= mb->end_subject - Flength &&
1231             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1232           else if (Loclength > 0 &&
1233                    Feptr <= mb->end_subject - Loclength &&
1234                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1235             Feptr += Loclength;
1236           else
1237             {
1238             CHECK_PARTIAL();
1239             RRETURN(MATCH_NOMATCH);
1240             }
1241           }
1242 
1243         if (Lmin == Lmax) continue;
1244 
1245         if (reptype == REPTYPE_MIN)
1246           {
1247           for (;;)
1248             {
1249             RMATCH(Fecode, RM202);
1250             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1251             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1252             if (Feptr <= mb->end_subject - Flength &&
1253               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1254             else if (Loclength > 0 &&
1255                      Feptr <= mb->end_subject - Loclength &&
1256                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1257               Feptr += Loclength;
1258             else
1259               {
1260               CHECK_PARTIAL();
1261               RRETURN(MATCH_NOMATCH);
1262               }
1263             }
1264           /* Control never gets here */
1265           }
1266 
1267         else  /* Maximize */
1268           {
1269           Lstart_eptr = Feptr;
1270           for (i = Lmin; i < Lmax; i++)
1271             {
1272             if (Feptr <= mb->end_subject - Flength &&
1273                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1274               Feptr += Flength;
1275             else if (Loclength > 0 &&
1276                      Feptr <= mb->end_subject - Loclength &&
1277                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1278               Feptr += Loclength;
1279             else
1280               {
1281               CHECK_PARTIAL();
1282               break;
1283               }
1284             }
1285 
1286           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1287           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1288           go too far. */
1289 
1290           if (reptype != REPTYPE_POS) for(;;)
1291             {
1292             if (Feptr <= Lstart_eptr) break;
1293             RMATCH(Fecode, RM203);
1294             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1295             Feptr--;
1296             BACKCHAR(Feptr);
1297             }
1298           }
1299         break;   /* End of repeated wide character handling */
1300         }
1301 
1302       /* Length of UTF character is 1. Put it into the preserved variable and
1303       fall through to the non-UTF code. */
1304 
1305       Lc = fc;
1306       }
1307     else
1308 #endif  /* SUPPORT_UNICODE */
1309 
1310     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1311     above, using Unicode casing if either UTF or UCP is set. */
1312 
1313     Lc = *Fecode++;
1314 
1315     /* Caseless comparison */
1316 
1317     if (Fop >= OP_STARI)
1318       {
1319 #if PCRE2_CODE_UNIT_WIDTH == 8
1320 #ifdef SUPPORT_UNICODE
1321       if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1322       else
1323 #endif  /* SUPPORT_UNICODE */
1324       /* Lc will be < 128 in UTF-8 mode. */
1325       Loc = mb->fcc[Lc];
1326 #else /* 16-bit & 32-bit */
1327 #ifdef SUPPORT_UNICODE
1328       if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1329       else
1330 #endif  /* SUPPORT_UNICODE */
1331       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1332 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1333 
1334       for (i = 1; i <= Lmin; i++)
1335         {
1336         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1337         if (Feptr >= mb->end_subject)
1338           {
1339           SCHECK_PARTIAL();
1340           RRETURN(MATCH_NOMATCH);
1341           }
1342         cc = UCHAR21TEST(Feptr);
1343         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1344         Feptr++;
1345         }
1346       if (Lmin == Lmax) continue;
1347 
1348       if (reptype == REPTYPE_MIN)
1349         {
1350         for (;;)
1351           {
1352           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1353           RMATCH(Fecode, RM25);
1354           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1355           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1356           if (Feptr >= mb->end_subject)
1357             {
1358             SCHECK_PARTIAL();
1359             RRETURN(MATCH_NOMATCH);
1360             }
1361           cc = UCHAR21TEST(Feptr);
1362           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1363           Feptr++;
1364           }
1365         /* Control never gets here */
1366         }
1367 
1368       else  /* Maximize */
1369         {
1370         Lstart_eptr = Feptr;
1371         for (i = Lmin; i < Lmax; i++)
1372           {
1373           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1374           if (Feptr >= mb->end_subject)
1375             {
1376             SCHECK_PARTIAL();
1377             break;
1378             }
1379           cc = UCHAR21TEST(Feptr);
1380           if (Lc != cc && Loc != cc) break;
1381           Feptr++;
1382           }
1383         if (reptype != REPTYPE_POS) for (;;)
1384           {
1385           if (Feptr == Lstart_eptr) break;
1386           RMATCH(Fecode, RM26);
1387           Feptr--;
1388           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1389           }
1390         }
1391       }
1392 
1393     /* Caseful comparisons (includes all multi-byte characters) */
1394 
1395     else
1396       {
1397       for (i = 1; i <= Lmin; i++)
1398         {
1399         if (Feptr >= mb->end_subject)
1400           {
1401           SCHECK_PARTIAL();
1402           RRETURN(MATCH_NOMATCH);
1403           }
1404         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1405         }
1406 
1407       if (Lmin == Lmax) continue;
1408 
1409       if (reptype == REPTYPE_MIN)
1410         {
1411         for (;;)
1412           {
1413           RMATCH(Fecode, RM27);
1414           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1415           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1416           if (Feptr >= mb->end_subject)
1417             {
1418             SCHECK_PARTIAL();
1419             RRETURN(MATCH_NOMATCH);
1420             }
1421           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1422           }
1423         /* Control never gets here */
1424         }
1425       else  /* Maximize */
1426         {
1427         Lstart_eptr = Feptr;
1428         for (i = Lmin; i < Lmax; i++)
1429           {
1430           if (Feptr >= mb->end_subject)
1431             {
1432             SCHECK_PARTIAL();
1433             break;
1434             }
1435 
1436           if (Lc != UCHAR21TEST(Feptr)) break;
1437           Feptr++;
1438           }
1439 
1440         if (reptype != REPTYPE_POS) for (;;)
1441           {
1442           if (Feptr <= Lstart_eptr) break;
1443           RMATCH(Fecode, RM28);
1444           Feptr--;
1445           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446           }
1447         }
1448       }
1449     break;
1450 
1451 #undef Loclength
1452 #undef Lstart_eptr
1453 #undef Lcharptr
1454 #undef Lmin
1455 #undef Lmax
1456 #undef Lc
1457 #undef Loc
1458 
1459 
1460     /* ===================================================================== */
1461     /* Match a negated single one-byte character repeatedly. This is almost a
1462     repeat of the code for a repeated single character, but I haven't found a
1463     nice way of commoning these up that doesn't require a test of the
1464     positive/negative option for each character match. Maybe that wouldn't add
1465     very much to the time taken, but character matching *is* what this is all
1466     about... */
1467 
1468 #define Lstart_eptr  F->temp_sptr[0]
1469 #define Lmin         F->temp_32[0]
1470 #define Lmax         F->temp_32[1]
1471 #define Lc           F->temp_32[2]
1472 #define Loc          F->temp_32[3]
1473 
1474     case OP_NOTEXACT:
1475     case OP_NOTEXACTI:
1476     Lmin = Lmax = GET2(Fecode, 1);
1477     Fecode += 1 + IMM2_SIZE;
1478     goto REPEATNOTCHAR;
1479 
1480     case OP_NOTUPTO:
1481     case OP_NOTUPTOI:
1482     Lmin = 0;
1483     Lmax = GET2(Fecode, 1);
1484     reptype = REPTYPE_MAX;
1485     Fecode += 1 + IMM2_SIZE;
1486     goto REPEATNOTCHAR;
1487 
1488     case OP_NOTMINUPTO:
1489     case OP_NOTMINUPTOI:
1490     Lmin = 0;
1491     Lmax = GET2(Fecode, 1);
1492     reptype = REPTYPE_MIN;
1493     Fecode += 1 + IMM2_SIZE;
1494     goto REPEATNOTCHAR;
1495 
1496     case OP_NOTPOSSTAR:
1497     case OP_NOTPOSSTARI:
1498     reptype = REPTYPE_POS;
1499     Lmin = 0;
1500     Lmax = UINT32_MAX;
1501     Fecode++;
1502     goto REPEATNOTCHAR;
1503 
1504     case OP_NOTPOSPLUS:
1505     case OP_NOTPOSPLUSI:
1506     reptype = REPTYPE_POS;
1507     Lmin = 1;
1508     Lmax = UINT32_MAX;
1509     Fecode++;
1510     goto REPEATNOTCHAR;
1511 
1512     case OP_NOTPOSQUERY:
1513     case OP_NOTPOSQUERYI:
1514     reptype = REPTYPE_POS;
1515     Lmin = 0;
1516     Lmax = 1;
1517     Fecode++;
1518     goto REPEATNOTCHAR;
1519 
1520     case OP_NOTPOSUPTO:
1521     case OP_NOTPOSUPTOI:
1522     reptype = REPTYPE_POS;
1523     Lmin = 0;
1524     Lmax = GET2(Fecode, 1);
1525     Fecode += 1 + IMM2_SIZE;
1526     goto REPEATNOTCHAR;
1527 
1528     case OP_NOTSTAR:
1529     case OP_NOTSTARI:
1530     case OP_NOTMINSTAR:
1531     case OP_NOTMINSTARI:
1532     case OP_NOTPLUS:
1533     case OP_NOTPLUSI:
1534     case OP_NOTMINPLUS:
1535     case OP_NOTMINPLUSI:
1536     case OP_NOTQUERY:
1537     case OP_NOTQUERYI:
1538     case OP_NOTMINQUERY:
1539     case OP_NOTMINQUERYI:
1540     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1541     Lmin = rep_min[fc];
1542     Lmax = rep_max[fc];
1543     reptype = rep_typ[fc];
1544 
1545     /* Common code for all repeated single-character non-matches. */
1546 
1547     REPEATNOTCHAR:
1548     GETCHARINCTEST(Lc, Fecode);
1549 
1550     /* The code is duplicated for the caseless and caseful cases, for speed,
1551     since matching characters is likely to be quite common. First, ensure the
1552     minimum number of matches are present. If Lmin = Lmax, we are done.
1553     Otherwise, if minimizing, keep trying the rest of the expression and
1554     advancing one matching character if failing, up to the maximum.
1555     Alternatively, if maximizing, find the maximum number of characters and
1556     work backwards. */
1557 
1558     if (Fop >= OP_NOTSTARI)     /* Caseless */
1559       {
1560 #ifdef SUPPORT_UNICODE
1561       if ((utf || ucp) && Lc > 127)
1562         Loc = UCD_OTHERCASE(Lc);
1563       else
1564 #endif /* SUPPORT_UNICODE */
1565 
1566       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1567 
1568 #ifdef SUPPORT_UNICODE
1569       if (utf)
1570         {
1571         uint32_t d;
1572         for (i = 1; i <= Lmin; i++)
1573           {
1574           if (Feptr >= mb->end_subject)
1575             {
1576             SCHECK_PARTIAL();
1577             RRETURN(MATCH_NOMATCH);
1578             }
1579           GETCHARINC(d, Feptr);
1580           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1581           }
1582         }
1583       else
1584 #endif  /* SUPPORT_UNICODE */
1585 
1586       /* Not UTF mode */
1587         {
1588         for (i = 1; i <= Lmin; i++)
1589           {
1590           if (Feptr >= mb->end_subject)
1591             {
1592             SCHECK_PARTIAL();
1593             RRETURN(MATCH_NOMATCH);
1594             }
1595           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1596           Feptr++;
1597           }
1598         }
1599 
1600       if (Lmin == Lmax) continue;  /* Finished for exact count */
1601 
1602       if (reptype == REPTYPE_MIN)
1603         {
1604 #ifdef SUPPORT_UNICODE
1605         if (utf)
1606           {
1607           uint32_t d;
1608           for (;;)
1609             {
1610             RMATCH(Fecode, RM204);
1611             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1612             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1613             if (Feptr >= mb->end_subject)
1614               {
1615               SCHECK_PARTIAL();
1616               RRETURN(MATCH_NOMATCH);
1617               }
1618             GETCHARINC(d, Feptr);
1619             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1620             }
1621           }
1622         else
1623 #endif  /*SUPPORT_UNICODE */
1624 
1625         /* Not UTF mode */
1626           {
1627           for (;;)
1628             {
1629             RMATCH(Fecode, RM29);
1630             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1631             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1632             if (Feptr >= mb->end_subject)
1633               {
1634               SCHECK_PARTIAL();
1635               RRETURN(MATCH_NOMATCH);
1636               }
1637             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1638             Feptr++;
1639             }
1640           }
1641         /* Control never gets here */
1642         }
1643 
1644       /* Maximize case */
1645 
1646       else
1647         {
1648         Lstart_eptr = Feptr;
1649 
1650 #ifdef SUPPORT_UNICODE
1651         if (utf)
1652           {
1653           uint32_t d;
1654           for (i = Lmin; i < Lmax; i++)
1655             {
1656             int len = 1;
1657             if (Feptr >= mb->end_subject)
1658               {
1659               SCHECK_PARTIAL();
1660               break;
1661               }
1662             GETCHARLEN(d, Feptr, len);
1663             if (Lc == d || Loc == d) break;
1664             Feptr += len;
1665             }
1666 
1667           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1668           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1669           go too far. */
1670 
1671           if (reptype != REPTYPE_POS) for(;;)
1672             {
1673             if (Feptr <= Lstart_eptr) break;
1674             RMATCH(Fecode, RM205);
1675             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1676             Feptr--;
1677             BACKCHAR(Feptr);
1678             }
1679           }
1680         else
1681 #endif  /* SUPPORT_UNICODE */
1682 
1683         /* Not UTF mode */
1684           {
1685           for (i = Lmin; i < Lmax; i++)
1686             {
1687             if (Feptr >= mb->end_subject)
1688               {
1689               SCHECK_PARTIAL();
1690               break;
1691               }
1692             if (Lc == *Feptr || Loc == *Feptr) break;
1693             Feptr++;
1694             }
1695           if (reptype != REPTYPE_POS) for (;;)
1696             {
1697             if (Feptr == Lstart_eptr) break;
1698             RMATCH(Fecode, RM30);
1699             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1700             Feptr--;
1701             }
1702           }
1703         }
1704       }
1705 
1706     /* Caseful comparisons */
1707 
1708     else
1709       {
1710 #ifdef SUPPORT_UNICODE
1711       if (utf)
1712         {
1713         uint32_t d;
1714         for (i = 1; i <= Lmin; i++)
1715           {
1716           if (Feptr >= mb->end_subject)
1717             {
1718             SCHECK_PARTIAL();
1719             RRETURN(MATCH_NOMATCH);
1720             }
1721           GETCHARINC(d, Feptr);
1722           if (Lc == d) RRETURN(MATCH_NOMATCH);
1723           }
1724         }
1725       else
1726 #endif
1727       /* Not UTF mode */
1728         {
1729         for (i = 1; i <= Lmin; i++)
1730           {
1731           if (Feptr >= mb->end_subject)
1732             {
1733             SCHECK_PARTIAL();
1734             RRETURN(MATCH_NOMATCH);
1735             }
1736           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1737           }
1738         }
1739 
1740       if (Lmin == Lmax) continue;
1741 
1742       if (reptype == REPTYPE_MIN)
1743         {
1744 #ifdef SUPPORT_UNICODE
1745         if (utf)
1746           {
1747           uint32_t d;
1748           for (;;)
1749             {
1750             RMATCH(Fecode, RM206);
1751             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1752             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1753             if (Feptr >= mb->end_subject)
1754               {
1755               SCHECK_PARTIAL();
1756               RRETURN(MATCH_NOMATCH);
1757               }
1758             GETCHARINC(d, Feptr);
1759             if (Lc == d) RRETURN(MATCH_NOMATCH);
1760             }
1761           }
1762         else
1763 #endif
1764         /* Not UTF mode */
1765           {
1766           for (;;)
1767             {
1768             RMATCH(Fecode, RM31);
1769             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1770             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1771             if (Feptr >= mb->end_subject)
1772               {
1773               SCHECK_PARTIAL();
1774               RRETURN(MATCH_NOMATCH);
1775               }
1776             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1777             }
1778           }
1779         /* Control never gets here */
1780         }
1781 
1782       /* Maximize case */
1783 
1784       else
1785         {
1786         Lstart_eptr = Feptr;
1787 
1788 #ifdef SUPPORT_UNICODE
1789         if (utf)
1790           {
1791           uint32_t d;
1792           for (i = Lmin; i < Lmax; i++)
1793             {
1794             int len = 1;
1795             if (Feptr >= mb->end_subject)
1796               {
1797               SCHECK_PARTIAL();
1798               break;
1799               }
1800             GETCHARLEN(d, Feptr, len);
1801             if (Lc == d) break;
1802             Feptr += len;
1803             }
1804 
1805           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1806           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1807           go too far. */
1808 
1809           if (reptype != REPTYPE_POS) for(;;)
1810             {
1811             if (Feptr <= Lstart_eptr) break;
1812             RMATCH(Fecode, RM207);
1813             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1814             Feptr--;
1815             BACKCHAR(Feptr);
1816             }
1817           }
1818         else
1819 #endif
1820         /* Not UTF mode */
1821           {
1822           for (i = Lmin; i < Lmax; i++)
1823             {
1824             if (Feptr >= mb->end_subject)
1825               {
1826               SCHECK_PARTIAL();
1827               break;
1828               }
1829             if (Lc == *Feptr) break;
1830             Feptr++;
1831             }
1832           if (reptype != REPTYPE_POS) for (;;)
1833             {
1834             if (Feptr == Lstart_eptr) break;
1835             RMATCH(Fecode, RM32);
1836             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1837             Feptr--;
1838             }
1839           }
1840         }
1841       }
1842     break;
1843 
1844 #undef Lstart_eptr
1845 #undef Lmin
1846 #undef Lmax
1847 #undef Lc
1848 #undef Loc
1849 
1850 
1851     /* ===================================================================== */
1852     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1853     are used when all the characters in the class have values in the range
1854     0-255, and either the matching is caseful, or the characters are in the
1855     range 0-127 when UTF processing is enabled. The only difference between
1856     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1857     encountered. */
1858 
1859 #define Lmin               F->temp_32[0]
1860 #define Lmax               F->temp_32[1]
1861 #define Lstart_eptr        F->temp_sptr[0]
1862 #define Lbyte_map_address  F->temp_sptr[1]
1863 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1864 
1865     case OP_NCLASS:
1866     case OP_CLASS:
1867       {
1868       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1869       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1870 
1871       /* Look past the end of the item to see if there is repeat information
1872       following. Then obey similar code to character type repeats. */
1873 
1874       switch (*Fecode)
1875         {
1876         case OP_CRSTAR:
1877         case OP_CRMINSTAR:
1878         case OP_CRPLUS:
1879         case OP_CRMINPLUS:
1880         case OP_CRQUERY:
1881         case OP_CRMINQUERY:
1882         case OP_CRPOSSTAR:
1883         case OP_CRPOSPLUS:
1884         case OP_CRPOSQUERY:
1885         fc = *Fecode++ - OP_CRSTAR;
1886         Lmin = rep_min[fc];
1887         Lmax = rep_max[fc];
1888         reptype = rep_typ[fc];
1889         break;
1890 
1891         case OP_CRRANGE:
1892         case OP_CRMINRANGE:
1893         case OP_CRPOSRANGE:
1894         Lmin = GET2(Fecode, 1);
1895         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1896         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1897         reptype = rep_typ[*Fecode - OP_CRSTAR];
1898         Fecode += 1 + 2 * IMM2_SIZE;
1899         break;
1900 
1901         default:               /* No repeat follows */
1902         Lmin = Lmax = 1;
1903         break;
1904         }
1905 
1906       /* First, ensure the minimum number of matches are present. */
1907 
1908 #ifdef SUPPORT_UNICODE
1909       if (utf)
1910         {
1911         for (i = 1; i <= Lmin; i++)
1912           {
1913           if (Feptr >= mb->end_subject)
1914             {
1915             SCHECK_PARTIAL();
1916             RRETURN(MATCH_NOMATCH);
1917             }
1918           GETCHARINC(fc, Feptr);
1919           if (fc > 255)
1920             {
1921             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1922             }
1923           else
1924             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1925           }
1926         }
1927       else
1928 #endif
1929       /* Not UTF mode */
1930         {
1931         for (i = 1; i <= Lmin; i++)
1932           {
1933           if (Feptr >= mb->end_subject)
1934             {
1935             SCHECK_PARTIAL();
1936             RRETURN(MATCH_NOMATCH);
1937             }
1938           fc = *Feptr++;
1939 #if PCRE2_CODE_UNIT_WIDTH != 8
1940           if (fc > 255)
1941             {
1942             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1943             }
1944           else
1945 #endif
1946           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1947           }
1948         }
1949 
1950       /* If Lmax == Lmin we are done. Continue with main loop. */
1951 
1952       if (Lmin == Lmax) continue;
1953 
1954       /* If minimizing, keep testing the rest of the expression and advancing
1955       the pointer while it matches the class. */
1956 
1957       if (reptype == REPTYPE_MIN)
1958         {
1959 #ifdef SUPPORT_UNICODE
1960         if (utf)
1961           {
1962           for (;;)
1963             {
1964             RMATCH(Fecode, RM200);
1965             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1966             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1967             if (Feptr >= mb->end_subject)
1968               {
1969               SCHECK_PARTIAL();
1970               RRETURN(MATCH_NOMATCH);
1971               }
1972             GETCHARINC(fc, Feptr);
1973             if (fc > 255)
1974               {
1975               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1976               }
1977             else
1978               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1979             }
1980           }
1981         else
1982 #endif
1983         /* Not UTF mode */
1984           {
1985           for (;;)
1986             {
1987             RMATCH(Fecode, RM23);
1988             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1989             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1990             if (Feptr >= mb->end_subject)
1991               {
1992               SCHECK_PARTIAL();
1993               RRETURN(MATCH_NOMATCH);
1994               }
1995             fc = *Feptr++;
1996 #if PCRE2_CODE_UNIT_WIDTH != 8
1997             if (fc > 255)
1998               {
1999               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2000               }
2001             else
2002 #endif
2003             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2004             }
2005           }
2006         /* Control never gets here */
2007         }
2008 
2009       /* If maximizing, find the longest possible run, then work backwards. */
2010 
2011       else
2012         {
2013         Lstart_eptr = Feptr;
2014 
2015 #ifdef SUPPORT_UNICODE
2016         if (utf)
2017           {
2018           for (i = Lmin; i < Lmax; i++)
2019             {
2020             int len = 1;
2021             if (Feptr >= mb->end_subject)
2022               {
2023               SCHECK_PARTIAL();
2024               break;
2025               }
2026             GETCHARLEN(fc, Feptr, len);
2027             if (fc > 255)
2028               {
2029               if (Fop == OP_CLASS) break;
2030               }
2031             else
2032               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2033             Feptr += len;
2034             }
2035 
2036           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2037 
2038           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2039           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2040           go too far. */
2041 
2042           for (;;)
2043             {
2044             RMATCH(Fecode, RM201);
2045             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2046             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2047             BACKCHAR(Feptr);
2048             }
2049           }
2050         else
2051 #endif
2052           /* Not UTF mode */
2053           {
2054           for (i = Lmin; i < Lmax; i++)
2055             {
2056             if (Feptr >= mb->end_subject)
2057               {
2058               SCHECK_PARTIAL();
2059               break;
2060               }
2061             fc = *Feptr;
2062 #if PCRE2_CODE_UNIT_WIDTH != 8
2063             if (fc > 255)
2064               {
2065               if (Fop == OP_CLASS) break;
2066               }
2067             else
2068 #endif
2069             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2070             Feptr++;
2071             }
2072 
2073           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2074 
2075           while (Feptr >= Lstart_eptr)
2076             {
2077             RMATCH(Fecode, RM24);
2078             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2079             Feptr--;
2080             }
2081           }
2082 
2083         RRETURN(MATCH_NOMATCH);
2084         }
2085       }
2086     /* Control never gets here */
2087 
2088 #undef Lbyte_map_address
2089 #undef Lbyte_map
2090 #undef Lstart_eptr
2091 #undef Lmin
2092 #undef Lmax
2093 
2094 
2095     /* ===================================================================== */
2096     /* Match an extended character class. In the 8-bit library, this opcode is
2097     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2098     32-bit libraries, codepoints greater than 255 may be encountered even when
2099     UTF is not supported. */
2100 
2101 #define Lstart_eptr  F->temp_sptr[0]
2102 #define Lxclass_data F->temp_sptr[1]
2103 #define Lmin         F->temp_32[0]
2104 #define Lmax         F->temp_32[1]
2105 
2106 #ifdef SUPPORT_WIDE_CHARS
2107     case OP_XCLASS:
2108       {
2109       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2110       Fecode += GET(Fecode, 1);               /* Advance past the item */
2111 
2112       switch (*Fecode)
2113         {
2114         case OP_CRSTAR:
2115         case OP_CRMINSTAR:
2116         case OP_CRPLUS:
2117         case OP_CRMINPLUS:
2118         case OP_CRQUERY:
2119         case OP_CRMINQUERY:
2120         case OP_CRPOSSTAR:
2121         case OP_CRPOSPLUS:
2122         case OP_CRPOSQUERY:
2123         fc = *Fecode++ - OP_CRSTAR;
2124         Lmin = rep_min[fc];
2125         Lmax = rep_max[fc];
2126         reptype = rep_typ[fc];
2127         break;
2128 
2129         case OP_CRRANGE:
2130         case OP_CRMINRANGE:
2131         case OP_CRPOSRANGE:
2132         Lmin = GET2(Fecode, 1);
2133         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2134         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2135         reptype = rep_typ[*Fecode - OP_CRSTAR];
2136         Fecode += 1 + 2 * IMM2_SIZE;
2137         break;
2138 
2139         default:               /* No repeat follows */
2140         Lmin = Lmax = 1;
2141         break;
2142         }
2143 
2144       /* First, ensure the minimum number of matches are present. */
2145 
2146       for (i = 1; i <= Lmin; i++)
2147         {
2148         if (Feptr >= mb->end_subject)
2149           {
2150           SCHECK_PARTIAL();
2151           RRETURN(MATCH_NOMATCH);
2152           }
2153         GETCHARINCTEST(fc, Feptr);
2154         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2155         }
2156 
2157       /* If Lmax == Lmin we can just continue with the main loop. */
2158 
2159       if (Lmin == Lmax) continue;
2160 
2161       /* If minimizing, keep testing the rest of the expression and advancing
2162       the pointer while it matches the class. */
2163 
2164       if (reptype == REPTYPE_MIN)
2165         {
2166         for (;;)
2167           {
2168           RMATCH(Fecode, RM100);
2169           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2170           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2171           if (Feptr >= mb->end_subject)
2172             {
2173             SCHECK_PARTIAL();
2174             RRETURN(MATCH_NOMATCH);
2175             }
2176           GETCHARINCTEST(fc, Feptr);
2177           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2178           }
2179         /* Control never gets here */
2180         }
2181 
2182       /* If maximizing, find the longest possible run, then work backwards. */
2183 
2184       else
2185         {
2186         Lstart_eptr = Feptr;
2187         for (i = Lmin; i < Lmax; i++)
2188           {
2189           int len = 1;
2190           if (Feptr >= mb->end_subject)
2191             {
2192             SCHECK_PARTIAL();
2193             break;
2194             }
2195 #ifdef SUPPORT_UNICODE
2196           GETCHARLENTEST(fc, Feptr, len);
2197 #else
2198           fc = *Feptr;
2199 #endif
2200           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2201           Feptr += len;
2202           }
2203 
2204         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2205 
2206         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2207         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2208         go too far. */
2209 
2210         for(;;)
2211           {
2212           RMATCH(Fecode, RM101);
2213           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2214           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2215 #ifdef SUPPORT_UNICODE
2216           if (utf) BACKCHAR(Feptr);
2217 #endif
2218           }
2219         RRETURN(MATCH_NOMATCH);
2220         }
2221 
2222       /* Control never gets here */
2223       }
2224 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2225 
2226 #undef Lstart_eptr
2227 #undef Lxclass_data
2228 #undef Lmin
2229 #undef Lmax
2230 
2231 
2232     /* ===================================================================== */
2233     /* Match various character types when PCRE2_UCP is not set. These opcodes
2234     are not generated when PCRE2_UCP is set - instead appropriate property
2235     tests are compiled. */
2236 
2237     case OP_NOT_DIGIT:
2238     if (Feptr >= mb->end_subject)
2239       {
2240       SCHECK_PARTIAL();
2241       RRETURN(MATCH_NOMATCH);
2242       }
2243     GETCHARINCTEST(fc, Feptr);
2244     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2245       RRETURN(MATCH_NOMATCH);
2246     Fecode++;
2247     break;
2248 
2249     case OP_DIGIT:
2250     if (Feptr >= mb->end_subject)
2251       {
2252       SCHECK_PARTIAL();
2253       RRETURN(MATCH_NOMATCH);
2254       }
2255     GETCHARINCTEST(fc, Feptr);
2256     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2257       RRETURN(MATCH_NOMATCH);
2258     Fecode++;
2259     break;
2260 
2261     case OP_NOT_WHITESPACE:
2262     if (Feptr >= mb->end_subject)
2263       {
2264       SCHECK_PARTIAL();
2265       RRETURN(MATCH_NOMATCH);
2266       }
2267     GETCHARINCTEST(fc, Feptr);
2268     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2269       RRETURN(MATCH_NOMATCH);
2270     Fecode++;
2271     break;
2272 
2273     case OP_WHITESPACE:
2274     if (Feptr >= mb->end_subject)
2275       {
2276       SCHECK_PARTIAL();
2277       RRETURN(MATCH_NOMATCH);
2278       }
2279     GETCHARINCTEST(fc, Feptr);
2280     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2281       RRETURN(MATCH_NOMATCH);
2282     Fecode++;
2283     break;
2284 
2285     case OP_NOT_WORDCHAR:
2286     if (Feptr >= mb->end_subject)
2287       {
2288       SCHECK_PARTIAL();
2289       RRETURN(MATCH_NOMATCH);
2290       }
2291     GETCHARINCTEST(fc, Feptr);
2292     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2293       RRETURN(MATCH_NOMATCH);
2294     Fecode++;
2295     break;
2296 
2297     case OP_WORDCHAR:
2298     if (Feptr >= mb->end_subject)
2299       {
2300       SCHECK_PARTIAL();
2301       RRETURN(MATCH_NOMATCH);
2302       }
2303     GETCHARINCTEST(fc, Feptr);
2304     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2305       RRETURN(MATCH_NOMATCH);
2306     Fecode++;
2307     break;
2308 
2309     case OP_ANYNL:
2310     if (Feptr >= mb->end_subject)
2311       {
2312       SCHECK_PARTIAL();
2313       RRETURN(MATCH_NOMATCH);
2314       }
2315     GETCHARINCTEST(fc, Feptr);
2316     switch(fc)
2317       {
2318       default: RRETURN(MATCH_NOMATCH);
2319 
2320       case CHAR_CR:
2321       if (Feptr >= mb->end_subject)
2322         {
2323         SCHECK_PARTIAL();
2324         }
2325       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2326       break;
2327 
2328       case CHAR_LF:
2329       break;
2330 
2331       case CHAR_VT:
2332       case CHAR_FF:
2333       case CHAR_NEL:
2334 #ifndef EBCDIC
2335       case 0x2028:
2336       case 0x2029:
2337 #endif  /* Not EBCDIC */
2338       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2339       break;
2340       }
2341     Fecode++;
2342     break;
2343 
2344     case OP_NOT_HSPACE:
2345     if (Feptr >= mb->end_subject)
2346       {
2347       SCHECK_PARTIAL();
2348       RRETURN(MATCH_NOMATCH);
2349       }
2350     GETCHARINCTEST(fc, Feptr);
2351     switch(fc)
2352       {
2353       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2354       default: break;
2355       }
2356     Fecode++;
2357     break;
2358 
2359     case OP_HSPACE:
2360     if (Feptr >= mb->end_subject)
2361       {
2362       SCHECK_PARTIAL();
2363       RRETURN(MATCH_NOMATCH);
2364       }
2365     GETCHARINCTEST(fc, Feptr);
2366     switch(fc)
2367       {
2368       HSPACE_CASES: break;  /* Byte and multibyte cases */
2369       default: RRETURN(MATCH_NOMATCH);
2370       }
2371     Fecode++;
2372     break;
2373 
2374     case OP_NOT_VSPACE:
2375     if (Feptr >= mb->end_subject)
2376       {
2377       SCHECK_PARTIAL();
2378       RRETURN(MATCH_NOMATCH);
2379       }
2380     GETCHARINCTEST(fc, Feptr);
2381     switch(fc)
2382       {
2383       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2384       default: break;
2385       }
2386     Fecode++;
2387     break;
2388 
2389     case OP_VSPACE:
2390     if (Feptr >= mb->end_subject)
2391       {
2392       SCHECK_PARTIAL();
2393       RRETURN(MATCH_NOMATCH);
2394       }
2395     GETCHARINCTEST(fc, Feptr);
2396     switch(fc)
2397       {
2398       VSPACE_CASES: break;
2399       default: RRETURN(MATCH_NOMATCH);
2400       }
2401     Fecode++;
2402     break;
2403 
2404 
2405 #ifdef SUPPORT_UNICODE
2406 
2407     /* ===================================================================== */
2408     /* Check the next character by Unicode property. We will get here only
2409     if the support is in the binary; otherwise a compile-time error occurs. */
2410 
2411     case OP_PROP:
2412     case OP_NOTPROP:
2413     if (Feptr >= mb->end_subject)
2414       {
2415       SCHECK_PARTIAL();
2416       RRETURN(MATCH_NOMATCH);
2417       }
2418     GETCHARINCTEST(fc, Feptr);
2419       {
2420       const uint32_t *cp;
2421       const ucd_record *prop = GET_UCD(fc);
2422 
2423       switch(Fecode[1])
2424         {
2425         case PT_ANY:
2426         if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2427         break;
2428 
2429         case PT_LAMP:
2430         if ((prop->chartype == ucp_Lu ||
2431              prop->chartype == ucp_Ll ||
2432              prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
2433           RRETURN(MATCH_NOMATCH);
2434         break;
2435 
2436         case PT_GC:
2437         if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
2438           RRETURN(MATCH_NOMATCH);
2439         break;
2440 
2441         case PT_PC:
2442         if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
2443           RRETURN(MATCH_NOMATCH);
2444         break;
2445 
2446         case PT_SC:
2447         if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
2448           RRETURN(MATCH_NOMATCH);
2449         break;
2450 
2451         /* These are specials */
2452 
2453         case PT_ALNUM:
2454         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2455              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
2456           RRETURN(MATCH_NOMATCH);
2457         break;
2458 
2459         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2460         which means that Perl space and POSIX space are now identical. PCRE
2461         was changed at release 8.34. */
2462 
2463         case PT_SPACE:    /* Perl space */
2464         case PT_PXSPACE:  /* POSIX space */
2465         switch(fc)
2466           {
2467           HSPACE_CASES:
2468           VSPACE_CASES:
2469           if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2470           break;
2471 
2472           default:
2473           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2474             (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2475           break;
2476           }
2477         break;
2478 
2479         case PT_WORD:
2480         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2481              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2482              fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
2483           RRETURN(MATCH_NOMATCH);
2484         break;
2485 
2486         case PT_CLIST:
2487         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2488         for (;;)
2489           {
2490           if (fc < *cp)
2491             { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2492           if (fc == *cp++)
2493             { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2494           }
2495         break;
2496 
2497         case PT_UCNC:
2498         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2499              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2500              fc >= 0xe000) == (Fop == OP_NOTPROP))
2501           RRETURN(MATCH_NOMATCH);
2502         break;
2503 
2504         /* This should never occur */
2505 
2506         default:
2507         return PCRE2_ERROR_INTERNAL;
2508         }
2509 
2510       Fecode += 3;
2511       }
2512     break;
2513 
2514 
2515     /* ===================================================================== */
2516     /* Match an extended Unicode sequence. We will get here only if the support
2517     is in the binary; otherwise a compile-time error occurs. */
2518 
2519     case OP_EXTUNI:
2520     if (Feptr >= mb->end_subject)
2521       {
2522       SCHECK_PARTIAL();
2523       RRETURN(MATCH_NOMATCH);
2524       }
2525     else
2526       {
2527       GETCHARINCTEST(fc, Feptr);
2528       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2529         NULL);
2530       }
2531     CHECK_PARTIAL();
2532     Fecode++;
2533     break;
2534 
2535 #endif  /* SUPPORT_UNICODE */
2536 
2537 
2538     /* ===================================================================== */
2539     /* Match a single character type repeatedly. Note that the property type
2540     does not need to be in a stack frame as it is not used within an RMATCH()
2541     loop. */
2542 
2543 #define Lstart_eptr  F->temp_sptr[0]
2544 #define Lmin         F->temp_32[0]
2545 #define Lmax         F->temp_32[1]
2546 #define Lctype       F->temp_32[2]
2547 #define Lpropvalue   F->temp_32[3]
2548 
2549     case OP_TYPEEXACT:
2550     Lmin = Lmax = GET2(Fecode, 1);
2551     Fecode += 1 + IMM2_SIZE;
2552     goto REPEATTYPE;
2553 
2554     case OP_TYPEUPTO:
2555     case OP_TYPEMINUPTO:
2556     Lmin = 0;
2557     Lmax = GET2(Fecode, 1);
2558     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2559     Fecode += 1 + IMM2_SIZE;
2560     goto REPEATTYPE;
2561 
2562     case OP_TYPEPOSSTAR:
2563     reptype = REPTYPE_POS;
2564     Lmin = 0;
2565     Lmax = UINT32_MAX;
2566     Fecode++;
2567     goto REPEATTYPE;
2568 
2569     case OP_TYPEPOSPLUS:
2570     reptype = REPTYPE_POS;
2571     Lmin = 1;
2572     Lmax = UINT32_MAX;
2573     Fecode++;
2574     goto REPEATTYPE;
2575 
2576     case OP_TYPEPOSQUERY:
2577     reptype = REPTYPE_POS;
2578     Lmin = 0;
2579     Lmax = 1;
2580     Fecode++;
2581     goto REPEATTYPE;
2582 
2583     case OP_TYPEPOSUPTO:
2584     reptype = REPTYPE_POS;
2585     Lmin = 0;
2586     Lmax = GET2(Fecode, 1);
2587     Fecode += 1 + IMM2_SIZE;
2588     goto REPEATTYPE;
2589 
2590     case OP_TYPESTAR:
2591     case OP_TYPEMINSTAR:
2592     case OP_TYPEPLUS:
2593     case OP_TYPEMINPLUS:
2594     case OP_TYPEQUERY:
2595     case OP_TYPEMINQUERY:
2596     fc = *Fecode++ - OP_TYPESTAR;
2597     Lmin = rep_min[fc];
2598     Lmax = rep_max[fc];
2599     reptype = rep_typ[fc];
2600 
2601     /* Common code for all repeated character type matches. */
2602 
2603     REPEATTYPE:
2604     Lctype = *Fecode++;      /* Code for the character type */
2605 
2606 #ifdef SUPPORT_UNICODE
2607     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2608       {
2609       proptype = *Fecode++;
2610       Lpropvalue = *Fecode++;
2611       }
2612     else proptype = -1;
2613 #endif
2614 
2615     /* First, ensure the minimum number of matches are present. Use inline
2616     code for maximizing the speed, and do the type test once at the start
2617     (i.e. keep it out of the loop). The code for UTF mode is separated out for
2618     tidiness, except for Unicode property tests. */
2619 
2620     if (Lmin > 0)
2621       {
2622 #ifdef SUPPORT_UNICODE
2623       if (proptype >= 0)  /* Property tests in all modes */
2624         {
2625         switch(proptype)
2626           {
2627           case PT_ANY:
2628           if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2629           for (i = 1; i <= Lmin; i++)
2630             {
2631             if (Feptr >= mb->end_subject)
2632               {
2633               SCHECK_PARTIAL();
2634               RRETURN(MATCH_NOMATCH);
2635               }
2636             GETCHARINCTEST(fc, Feptr);
2637             }
2638           break;
2639 
2640           case PT_LAMP:
2641           for (i = 1; i <= Lmin; i++)
2642             {
2643             int chartype;
2644             if (Feptr >= mb->end_subject)
2645               {
2646               SCHECK_PARTIAL();
2647               RRETURN(MATCH_NOMATCH);
2648               }
2649             GETCHARINCTEST(fc, Feptr);
2650             chartype = UCD_CHARTYPE(fc);
2651             if ((chartype == ucp_Lu ||
2652                  chartype == ucp_Ll ||
2653                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
2654               RRETURN(MATCH_NOMATCH);
2655             }
2656           break;
2657 
2658           case PT_GC:
2659           for (i = 1; i <= Lmin; i++)
2660             {
2661             if (Feptr >= mb->end_subject)
2662               {
2663               SCHECK_PARTIAL();
2664               RRETURN(MATCH_NOMATCH);
2665               }
2666             GETCHARINCTEST(fc, Feptr);
2667             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2668               RRETURN(MATCH_NOMATCH);
2669             }
2670           break;
2671 
2672           case PT_PC:
2673           for (i = 1; i <= Lmin; i++)
2674             {
2675             if (Feptr >= mb->end_subject)
2676               {
2677               SCHECK_PARTIAL();
2678               RRETURN(MATCH_NOMATCH);
2679               }
2680             GETCHARINCTEST(fc, Feptr);
2681             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2682               RRETURN(MATCH_NOMATCH);
2683             }
2684           break;
2685 
2686           case PT_SC:
2687           for (i = 1; i <= Lmin; i++)
2688             {
2689             if (Feptr >= mb->end_subject)
2690               {
2691               SCHECK_PARTIAL();
2692               RRETURN(MATCH_NOMATCH);
2693               }
2694             GETCHARINCTEST(fc, Feptr);
2695             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2696               RRETURN(MATCH_NOMATCH);
2697             }
2698           break;
2699 
2700           case PT_ALNUM:
2701           for (i = 1; i <= Lmin; i++)
2702             {
2703             int category;
2704             if (Feptr >= mb->end_subject)
2705               {
2706               SCHECK_PARTIAL();
2707               RRETURN(MATCH_NOMATCH);
2708               }
2709             GETCHARINCTEST(fc, Feptr);
2710             category = UCD_CATEGORY(fc);
2711             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
2712               RRETURN(MATCH_NOMATCH);
2713             }
2714           break;
2715 
2716           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2717           which means that Perl space and POSIX space are now identical. PCRE
2718           was changed at release 8.34. */
2719 
2720           case PT_SPACE:    /* Perl space */
2721           case PT_PXSPACE:  /* POSIX space */
2722           for (i = 1; i <= Lmin; i++)
2723             {
2724             if (Feptr >= mb->end_subject)
2725               {
2726               SCHECK_PARTIAL();
2727               RRETURN(MATCH_NOMATCH);
2728               }
2729             GETCHARINCTEST(fc, Feptr);
2730             switch(fc)
2731               {
2732               HSPACE_CASES:
2733               VSPACE_CASES:
2734               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2735               break;
2736 
2737               default:
2738               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
2739                 RRETURN(MATCH_NOMATCH);
2740               break;
2741               }
2742             }
2743           break;
2744 
2745           case PT_WORD:
2746           for (i = 1; i <= Lmin; i++)
2747             {
2748             int category;
2749             if (Feptr >= mb->end_subject)
2750               {
2751               SCHECK_PARTIAL();
2752               RRETURN(MATCH_NOMATCH);
2753               }
2754             GETCHARINCTEST(fc, Feptr);
2755             category = UCD_CATEGORY(fc);
2756             if ((category == ucp_L || category == ucp_N ||
2757                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
2758               RRETURN(MATCH_NOMATCH);
2759             }
2760           break;
2761 
2762           case PT_CLIST:
2763           for (i = 1; i <= Lmin; i++)
2764             {
2765             const uint32_t *cp;
2766             if (Feptr >= mb->end_subject)
2767               {
2768               SCHECK_PARTIAL();
2769               RRETURN(MATCH_NOMATCH);
2770               }
2771             GETCHARINCTEST(fc, Feptr);
2772             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2773             for (;;)
2774               {
2775               if (fc < *cp)
2776                 {
2777                 if (Lctype == OP_NOTPROP) break;
2778                 RRETURN(MATCH_NOMATCH);
2779                 }
2780               if (fc == *cp++)
2781                 {
2782                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2783                 break;
2784                 }
2785               }
2786             }
2787           break;
2788 
2789           case PT_UCNC:
2790           for (i = 1; i <= Lmin; i++)
2791             {
2792             if (Feptr >= mb->end_subject)
2793               {
2794               SCHECK_PARTIAL();
2795               RRETURN(MATCH_NOMATCH);
2796               }
2797             GETCHARINCTEST(fc, Feptr);
2798             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2799                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2800                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
2801               RRETURN(MATCH_NOMATCH);
2802             }
2803           break;
2804 
2805           /* This should not occur */
2806 
2807           default:
2808           return PCRE2_ERROR_INTERNAL;
2809           }
2810         }
2811 
2812       /* Match extended Unicode sequences. We will get here only if the
2813       support is in the binary; otherwise a compile-time error occurs. */
2814 
2815       else if (Lctype == OP_EXTUNI)
2816         {
2817         for (i = 1; i <= Lmin; i++)
2818           {
2819           if (Feptr >= mb->end_subject)
2820             {
2821             SCHECK_PARTIAL();
2822             RRETURN(MATCH_NOMATCH);
2823             }
2824           else
2825             {
2826             GETCHARINCTEST(fc, Feptr);
2827             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2828               mb->end_subject, utf, NULL);
2829             }
2830           CHECK_PARTIAL();
2831           }
2832         }
2833       else
2834 #endif     /* SUPPORT_UNICODE */
2835 
2836 /* Handle all other cases in UTF mode */
2837 
2838 #ifdef SUPPORT_UNICODE
2839       if (utf) switch(Lctype)
2840         {
2841         case OP_ANY:
2842         for (i = 1; i <= Lmin; i++)
2843           {
2844           if (Feptr >= mb->end_subject)
2845             {
2846             SCHECK_PARTIAL();
2847             RRETURN(MATCH_NOMATCH);
2848             }
2849           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2850           if (mb->partial != 0 &&
2851               Feptr + 1 >= mb->end_subject &&
2852               NLBLOCK->nltype == NLTYPE_FIXED &&
2853               NLBLOCK->nllen == 2 &&
2854               UCHAR21(Feptr) == NLBLOCK->nl[0])
2855             {
2856             mb->hitend = TRUE;
2857             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2858             }
2859           Feptr++;
2860           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2861           }
2862         break;
2863 
2864         case OP_ALLANY:
2865         for (i = 1; i <= Lmin; i++)
2866           {
2867           if (Feptr >= mb->end_subject)
2868             {
2869             SCHECK_PARTIAL();
2870             RRETURN(MATCH_NOMATCH);
2871             }
2872           Feptr++;
2873           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2874           }
2875         break;
2876 
2877         case OP_ANYBYTE:
2878         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2879         Feptr += Lmin;
2880         break;
2881 
2882         case OP_ANYNL:
2883         for (i = 1; i <= Lmin; i++)
2884           {
2885           if (Feptr >= mb->end_subject)
2886             {
2887             SCHECK_PARTIAL();
2888             RRETURN(MATCH_NOMATCH);
2889             }
2890           GETCHARINC(fc, Feptr);
2891           switch(fc)
2892             {
2893             default: RRETURN(MATCH_NOMATCH);
2894 
2895             case CHAR_CR:
2896             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2897             break;
2898 
2899             case CHAR_LF:
2900             break;
2901 
2902             case CHAR_VT:
2903             case CHAR_FF:
2904             case CHAR_NEL:
2905 #ifndef EBCDIC
2906             case 0x2028:
2907             case 0x2029:
2908 #endif  /* Not EBCDIC */
2909             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2910             break;
2911             }
2912           }
2913         break;
2914 
2915         case OP_NOT_HSPACE:
2916         for (i = 1; i <= Lmin; i++)
2917           {
2918           if (Feptr >= mb->end_subject)
2919             {
2920             SCHECK_PARTIAL();
2921             RRETURN(MATCH_NOMATCH);
2922             }
2923           GETCHARINC(fc, Feptr);
2924           switch(fc)
2925             {
2926             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
2927             default: break;
2928             }
2929           }
2930         break;
2931 
2932         case OP_HSPACE:
2933         for (i = 1; i <= Lmin; i++)
2934           {
2935           if (Feptr >= mb->end_subject)
2936             {
2937             SCHECK_PARTIAL();
2938             RRETURN(MATCH_NOMATCH);
2939             }
2940           GETCHARINC(fc, Feptr);
2941           switch(fc)
2942             {
2943             HSPACE_CASES: break;
2944             default: RRETURN(MATCH_NOMATCH);
2945             }
2946           }
2947         break;
2948 
2949         case OP_NOT_VSPACE:
2950         for (i = 1; i <= Lmin; i++)
2951           {
2952           if (Feptr >= mb->end_subject)
2953             {
2954             SCHECK_PARTIAL();
2955             RRETURN(MATCH_NOMATCH);
2956             }
2957           GETCHARINC(fc, Feptr);
2958           switch(fc)
2959             {
2960             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2961             default: break;
2962             }
2963           }
2964         break;
2965 
2966         case OP_VSPACE:
2967         for (i = 1; i <= Lmin; i++)
2968           {
2969           if (Feptr >= mb->end_subject)
2970             {
2971             SCHECK_PARTIAL();
2972             RRETURN(MATCH_NOMATCH);
2973             }
2974           GETCHARINC(fc, Feptr);
2975           switch(fc)
2976             {
2977             VSPACE_CASES: break;
2978             default: RRETURN(MATCH_NOMATCH);
2979             }
2980           }
2981         break;
2982 
2983         case OP_NOT_DIGIT:
2984         for (i = 1; i <= Lmin; i++)
2985           {
2986           if (Feptr >= mb->end_subject)
2987             {
2988             SCHECK_PARTIAL();
2989             RRETURN(MATCH_NOMATCH);
2990             }
2991           GETCHARINC(fc, Feptr);
2992           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
2993             RRETURN(MATCH_NOMATCH);
2994           }
2995         break;
2996 
2997         case OP_DIGIT:
2998         for (i = 1; i <= Lmin; i++)
2999           {
3000           uint32_t cc;
3001           if (Feptr >= mb->end_subject)
3002             {
3003             SCHECK_PARTIAL();
3004             RRETURN(MATCH_NOMATCH);
3005             }
3006           cc = UCHAR21(Feptr);
3007           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3008             RRETURN(MATCH_NOMATCH);
3009           Feptr++;
3010           /* No need to skip more code units - we know it has only one. */
3011           }
3012         break;
3013 
3014         case OP_NOT_WHITESPACE:
3015         for (i = 1; i <= Lmin; i++)
3016           {
3017           uint32_t cc;
3018           if (Feptr >= mb->end_subject)
3019             {
3020             SCHECK_PARTIAL();
3021             RRETURN(MATCH_NOMATCH);
3022             }
3023           cc = UCHAR21(Feptr);
3024           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3025             RRETURN(MATCH_NOMATCH);
3026           Feptr++;
3027           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3028           }
3029         break;
3030 
3031         case OP_WHITESPACE:
3032         for (i = 1; i <= Lmin; i++)
3033           {
3034           uint32_t cc;
3035           if (Feptr >= mb->end_subject)
3036             {
3037             SCHECK_PARTIAL();
3038             RRETURN(MATCH_NOMATCH);
3039             }
3040           cc = UCHAR21(Feptr);
3041           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3042             RRETURN(MATCH_NOMATCH);
3043           Feptr++;
3044           /* No need to skip more code units - we know it has only one. */
3045           }
3046         break;
3047 
3048         case OP_NOT_WORDCHAR:
3049         for (i = 1; i <= Lmin; i++)
3050           {
3051           uint32_t cc;
3052           if (Feptr >= mb->end_subject)
3053             {
3054             SCHECK_PARTIAL();
3055             RRETURN(MATCH_NOMATCH);
3056             }
3057           cc = UCHAR21(Feptr);
3058           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3059             RRETURN(MATCH_NOMATCH);
3060           Feptr++;
3061           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3062           }
3063         break;
3064 
3065         case OP_WORDCHAR:
3066         for (i = 1; i <= Lmin; i++)
3067           {
3068           uint32_t cc;
3069           if (Feptr >= mb->end_subject)
3070             {
3071             SCHECK_PARTIAL();
3072             RRETURN(MATCH_NOMATCH);
3073             }
3074           cc = UCHAR21(Feptr);
3075           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3076             RRETURN(MATCH_NOMATCH);
3077           Feptr++;
3078           /* No need to skip more code units - we know it has only one. */
3079           }
3080         break;
3081 
3082         default:
3083         return PCRE2_ERROR_INTERNAL;
3084         }  /* End switch(Lctype) */
3085 
3086       else
3087 #endif     /* SUPPORT_UNICODE */
3088 
3089       /* Code for the non-UTF case for minimum matching of operators other
3090       than OP_PROP and OP_NOTPROP. */
3091 
3092       switch(Lctype)
3093         {
3094         case OP_ANY:
3095         for (i = 1; i <= Lmin; i++)
3096           {
3097           if (Feptr >= mb->end_subject)
3098             {
3099             SCHECK_PARTIAL();
3100             RRETURN(MATCH_NOMATCH);
3101             }
3102           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3103           if (mb->partial != 0 &&
3104               Feptr + 1 >= mb->end_subject &&
3105               NLBLOCK->nltype == NLTYPE_FIXED &&
3106               NLBLOCK->nllen == 2 &&
3107               *Feptr == NLBLOCK->nl[0])
3108             {
3109             mb->hitend = TRUE;
3110             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3111             }
3112           Feptr++;
3113           }
3114         break;
3115 
3116         case OP_ALLANY:
3117         if (Feptr > mb->end_subject - Lmin)
3118           {
3119           SCHECK_PARTIAL();
3120           RRETURN(MATCH_NOMATCH);
3121           }
3122         Feptr += Lmin;
3123         break;
3124 
3125         /* This OP_ANYBYTE case will never be reached because \C gets turned
3126         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3127         reports don't complain about it's never being used. */
3128 
3129 /*        case OP_ANYBYTE:
3130 *        if (Feptr > mb->end_subject - Lmin)
3131 *          {
3132 *          SCHECK_PARTIAL();
3133 *          RRETURN(MATCH_NOMATCH);
3134 *          }
3135 *        Feptr += Lmin;
3136 *        break;
3137 */
3138         case OP_ANYNL:
3139         for (i = 1; i <= Lmin; i++)
3140           {
3141           if (Feptr >= mb->end_subject)
3142             {
3143             SCHECK_PARTIAL();
3144             RRETURN(MATCH_NOMATCH);
3145             }
3146           switch(*Feptr++)
3147             {
3148             default: RRETURN(MATCH_NOMATCH);
3149 
3150             case CHAR_CR:
3151             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3152             break;
3153 
3154             case CHAR_LF:
3155             break;
3156 
3157             case CHAR_VT:
3158             case CHAR_FF:
3159             case CHAR_NEL:
3160 #if PCRE2_CODE_UNIT_WIDTH != 8
3161             case 0x2028:
3162             case 0x2029:
3163 #endif
3164             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3165             break;
3166             }
3167           }
3168         break;
3169 
3170         case OP_NOT_HSPACE:
3171         for (i = 1; i <= Lmin; i++)
3172           {
3173           if (Feptr >= mb->end_subject)
3174             {
3175             SCHECK_PARTIAL();
3176             RRETURN(MATCH_NOMATCH);
3177             }
3178           switch(*Feptr++)
3179             {
3180             default: break;
3181             HSPACE_BYTE_CASES:
3182 #if PCRE2_CODE_UNIT_WIDTH != 8
3183             HSPACE_MULTIBYTE_CASES:
3184 #endif
3185             RRETURN(MATCH_NOMATCH);
3186             }
3187           }
3188         break;
3189 
3190         case OP_HSPACE:
3191         for (i = 1; i <= Lmin; i++)
3192           {
3193           if (Feptr >= mb->end_subject)
3194             {
3195             SCHECK_PARTIAL();
3196             RRETURN(MATCH_NOMATCH);
3197             }
3198           switch(*Feptr++)
3199             {
3200             default: RRETURN(MATCH_NOMATCH);
3201             HSPACE_BYTE_CASES:
3202 #if PCRE2_CODE_UNIT_WIDTH != 8
3203             HSPACE_MULTIBYTE_CASES:
3204 #endif
3205             break;
3206             }
3207           }
3208         break;
3209 
3210         case OP_NOT_VSPACE:
3211         for (i = 1; i <= Lmin; i++)
3212           {
3213           if (Feptr >= mb->end_subject)
3214             {
3215             SCHECK_PARTIAL();
3216             RRETURN(MATCH_NOMATCH);
3217             }
3218           switch(*Feptr++)
3219             {
3220             VSPACE_BYTE_CASES:
3221 #if PCRE2_CODE_UNIT_WIDTH != 8
3222             VSPACE_MULTIBYTE_CASES:
3223 #endif
3224             RRETURN(MATCH_NOMATCH);
3225             default: break;
3226             }
3227           }
3228         break;
3229 
3230         case OP_VSPACE:
3231         for (i = 1; i <= Lmin; i++)
3232           {
3233           if (Feptr >= mb->end_subject)
3234             {
3235             SCHECK_PARTIAL();
3236             RRETURN(MATCH_NOMATCH);
3237             }
3238           switch(*Feptr++)
3239             {
3240             default: RRETURN(MATCH_NOMATCH);
3241             VSPACE_BYTE_CASES:
3242 #if PCRE2_CODE_UNIT_WIDTH != 8
3243             VSPACE_MULTIBYTE_CASES:
3244 #endif
3245             break;
3246             }
3247           }
3248         break;
3249 
3250         case OP_NOT_DIGIT:
3251         for (i = 1; i <= Lmin; i++)
3252           {
3253           if (Feptr >= mb->end_subject)
3254             {
3255             SCHECK_PARTIAL();
3256             RRETURN(MATCH_NOMATCH);
3257             }
3258           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3259             RRETURN(MATCH_NOMATCH);
3260           Feptr++;
3261           }
3262         break;
3263 
3264         case OP_DIGIT:
3265         for (i = 1; i <= Lmin; i++)
3266           {
3267           if (Feptr >= mb->end_subject)
3268             {
3269             SCHECK_PARTIAL();
3270             RRETURN(MATCH_NOMATCH);
3271             }
3272           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3273             RRETURN(MATCH_NOMATCH);
3274           Feptr++;
3275           }
3276         break;
3277 
3278         case OP_NOT_WHITESPACE:
3279         for (i = 1; i <= Lmin; i++)
3280           {
3281           if (Feptr >= mb->end_subject)
3282             {
3283             SCHECK_PARTIAL();
3284             RRETURN(MATCH_NOMATCH);
3285             }
3286           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3287             RRETURN(MATCH_NOMATCH);
3288           Feptr++;
3289           }
3290         break;
3291 
3292         case OP_WHITESPACE:
3293         for (i = 1; i <= Lmin; i++)
3294           {
3295           if (Feptr >= mb->end_subject)
3296             {
3297             SCHECK_PARTIAL();
3298             RRETURN(MATCH_NOMATCH);
3299             }
3300           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3301             RRETURN(MATCH_NOMATCH);
3302           Feptr++;
3303           }
3304         break;
3305 
3306         case OP_NOT_WORDCHAR:
3307         for (i = 1; i <= Lmin; i++)
3308           {
3309           if (Feptr >= mb->end_subject)
3310             {
3311             SCHECK_PARTIAL();
3312             RRETURN(MATCH_NOMATCH);
3313             }
3314           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3315             RRETURN(MATCH_NOMATCH);
3316           Feptr++;
3317           }
3318         break;
3319 
3320         case OP_WORDCHAR:
3321         for (i = 1; i <= Lmin; i++)
3322           {
3323           if (Feptr >= mb->end_subject)
3324             {
3325             SCHECK_PARTIAL();
3326             RRETURN(MATCH_NOMATCH);
3327             }
3328           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3329             RRETURN(MATCH_NOMATCH);
3330           Feptr++;
3331           }
3332         break;
3333 
3334         default:
3335         return PCRE2_ERROR_INTERNAL;
3336         }
3337       }
3338 
3339     /* If Lmin = Lmax we are done. Continue with the main loop. */
3340 
3341     if (Lmin == Lmax) continue;
3342 
3343     /* If minimizing, we have to test the rest of the pattern before each
3344     subsequent match. */
3345 
3346     if (reptype == REPTYPE_MIN)
3347       {
3348 #ifdef SUPPORT_UNICODE
3349       if (proptype >= 0)
3350         {
3351         switch(proptype)
3352           {
3353           case PT_ANY:
3354           for (;;)
3355             {
3356             RMATCH(Fecode, RM208);
3357             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3358             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3359             if (Feptr >= mb->end_subject)
3360               {
3361               SCHECK_PARTIAL();
3362               RRETURN(MATCH_NOMATCH);
3363               }
3364             GETCHARINCTEST(fc, Feptr);
3365             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3366             }
3367           /* Control never gets here */
3368 
3369           case PT_LAMP:
3370           for (;;)
3371             {
3372             int chartype;
3373             RMATCH(Fecode, RM209);
3374             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3375             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3376             if (Feptr >= mb->end_subject)
3377               {
3378               SCHECK_PARTIAL();
3379               RRETURN(MATCH_NOMATCH);
3380               }
3381             GETCHARINCTEST(fc, Feptr);
3382             chartype = UCD_CHARTYPE(fc);
3383             if ((chartype == ucp_Lu ||
3384                  chartype == ucp_Ll ||
3385                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3386               RRETURN(MATCH_NOMATCH);
3387             }
3388           /* Control never gets here */
3389 
3390           case PT_GC:
3391           for (;;)
3392             {
3393             RMATCH(Fecode, RM210);
3394             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3396             if (Feptr >= mb->end_subject)
3397               {
3398               SCHECK_PARTIAL();
3399               RRETURN(MATCH_NOMATCH);
3400               }
3401             GETCHARINCTEST(fc, Feptr);
3402             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3403               RRETURN(MATCH_NOMATCH);
3404             }
3405           /* Control never gets here */
3406 
3407           case PT_PC:
3408           for (;;)
3409             {
3410             RMATCH(Fecode, RM211);
3411             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3412             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3413             if (Feptr >= mb->end_subject)
3414               {
3415               SCHECK_PARTIAL();
3416               RRETURN(MATCH_NOMATCH);
3417               }
3418             GETCHARINCTEST(fc, Feptr);
3419             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3420               RRETURN(MATCH_NOMATCH);
3421             }
3422           /* Control never gets here */
3423 
3424           case PT_SC:
3425           for (;;)
3426             {
3427             RMATCH(Fecode, RM212);
3428             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3429             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3430             if (Feptr >= mb->end_subject)
3431               {
3432               SCHECK_PARTIAL();
3433               RRETURN(MATCH_NOMATCH);
3434               }
3435             GETCHARINCTEST(fc, Feptr);
3436             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3437               RRETURN(MATCH_NOMATCH);
3438             }
3439           /* Control never gets here */
3440 
3441           case PT_ALNUM:
3442           for (;;)
3443             {
3444             int category;
3445             RMATCH(Fecode, RM213);
3446             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3448             if (Feptr >= mb->end_subject)
3449               {
3450               SCHECK_PARTIAL();
3451               RRETURN(MATCH_NOMATCH);
3452               }
3453             GETCHARINCTEST(fc, Feptr);
3454             category = UCD_CATEGORY(fc);
3455             if ((category == ucp_L || category == ucp_N) ==
3456                 (Lctype == OP_NOTPROP))
3457               RRETURN(MATCH_NOMATCH);
3458             }
3459           /* Control never gets here */
3460 
3461           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3462           which means that Perl space and POSIX space are now identical. PCRE
3463           was changed at release 8.34. */
3464 
3465           case PT_SPACE:    /* Perl space */
3466           case PT_PXSPACE:  /* POSIX space */
3467           for (;;)
3468             {
3469             RMATCH(Fecode, RM214);
3470             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3471             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3472             if (Feptr >= mb->end_subject)
3473               {
3474               SCHECK_PARTIAL();
3475               RRETURN(MATCH_NOMATCH);
3476               }
3477             GETCHARINCTEST(fc, Feptr);
3478             switch(fc)
3479               {
3480               HSPACE_CASES:
3481               VSPACE_CASES:
3482               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3483               break;
3484 
3485               default:
3486               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3487                 RRETURN(MATCH_NOMATCH);
3488               break;
3489               }
3490             }
3491           /* Control never gets here */
3492 
3493           case PT_WORD:
3494           for (;;)
3495             {
3496             int category;
3497             RMATCH(Fecode, RM215);
3498             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3499             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3500             if (Feptr >= mb->end_subject)
3501               {
3502               SCHECK_PARTIAL();
3503               RRETURN(MATCH_NOMATCH);
3504               }
3505             GETCHARINCTEST(fc, Feptr);
3506             category = UCD_CATEGORY(fc);
3507             if ((category == ucp_L ||
3508                  category == ucp_N ||
3509                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3510               RRETURN(MATCH_NOMATCH);
3511             }
3512           /* Control never gets here */
3513 
3514           case PT_CLIST:
3515           for (;;)
3516             {
3517             const uint32_t *cp;
3518             RMATCH(Fecode, RM216);
3519             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3520             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3521             if (Feptr >= mb->end_subject)
3522               {
3523               SCHECK_PARTIAL();
3524               RRETURN(MATCH_NOMATCH);
3525               }
3526             GETCHARINCTEST(fc, Feptr);
3527             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3528             for (;;)
3529               {
3530               if (fc < *cp)
3531                 {
3532                 if (Lctype == OP_NOTPROP) break;
3533                 RRETURN(MATCH_NOMATCH);
3534                 }
3535               if (fc == *cp++)
3536                 {
3537                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3538                 break;
3539                 }
3540               }
3541             }
3542           /* Control never gets here */
3543 
3544           case PT_UCNC:
3545           for (;;)
3546             {
3547             RMATCH(Fecode, RM217);
3548             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3549             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3550             if (Feptr >= mb->end_subject)
3551               {
3552               SCHECK_PARTIAL();
3553               RRETURN(MATCH_NOMATCH);
3554               }
3555             GETCHARINCTEST(fc, Feptr);
3556             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3557                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3558                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3559               RRETURN(MATCH_NOMATCH);
3560             }
3561           /* Control never gets here */
3562 
3563           /* This should never occur */
3564           default:
3565           return PCRE2_ERROR_INTERNAL;
3566           }
3567         }
3568 
3569       /* Match extended Unicode sequences. We will get here only if the
3570       support is in the binary; otherwise a compile-time error occurs. */
3571 
3572       else if (Lctype == OP_EXTUNI)
3573         {
3574         for (;;)
3575           {
3576           RMATCH(Fecode, RM218);
3577           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3578           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3579           if (Feptr >= mb->end_subject)
3580             {
3581             SCHECK_PARTIAL();
3582             RRETURN(MATCH_NOMATCH);
3583             }
3584           else
3585             {
3586             GETCHARINCTEST(fc, Feptr);
3587             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3588               utf, NULL);
3589             }
3590           CHECK_PARTIAL();
3591           }
3592         }
3593       else
3594 #endif     /* SUPPORT_UNICODE */
3595 
3596       /* UTF mode for non-property testing character types. */
3597 
3598 #ifdef SUPPORT_UNICODE
3599       if (utf)
3600         {
3601         for (;;)
3602           {
3603           RMATCH(Fecode, RM219);
3604           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3605           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3606           if (Feptr >= mb->end_subject)
3607             {
3608             SCHECK_PARTIAL();
3609             RRETURN(MATCH_NOMATCH);
3610             }
3611           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3612           GETCHARINC(fc, Feptr);
3613           switch(Lctype)
3614             {
3615             case OP_ANY:               /* This is the non-NL case */
3616             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3617                 Feptr >= mb->end_subject &&
3618                 NLBLOCK->nltype == NLTYPE_FIXED &&
3619                 NLBLOCK->nllen == 2 &&
3620                 fc == NLBLOCK->nl[0])
3621               {
3622               mb->hitend = TRUE;
3623               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3624               }
3625             break;
3626 
3627             case OP_ALLANY:
3628             case OP_ANYBYTE:
3629             break;
3630 
3631             case OP_ANYNL:
3632             switch(fc)
3633               {
3634               default: RRETURN(MATCH_NOMATCH);
3635 
3636               case CHAR_CR:
3637               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3638               break;
3639 
3640               case CHAR_LF:
3641               break;
3642 
3643               case CHAR_VT:
3644               case CHAR_FF:
3645               case CHAR_NEL:
3646 #ifndef EBCDIC
3647               case 0x2028:
3648               case 0x2029:
3649 #endif  /* Not EBCDIC */
3650               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3651                 RRETURN(MATCH_NOMATCH);
3652               break;
3653               }
3654             break;
3655 
3656             case OP_NOT_HSPACE:
3657             switch(fc)
3658               {
3659               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3660               default: break;
3661               }
3662             break;
3663 
3664             case OP_HSPACE:
3665             switch(fc)
3666               {
3667               HSPACE_CASES: break;
3668               default: RRETURN(MATCH_NOMATCH);
3669               }
3670             break;
3671 
3672             case OP_NOT_VSPACE:
3673             switch(fc)
3674               {
3675               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3676               default: break;
3677               }
3678             break;
3679 
3680             case OP_VSPACE:
3681             switch(fc)
3682               {
3683               VSPACE_CASES: break;
3684               default: RRETURN(MATCH_NOMATCH);
3685               }
3686             break;
3687 
3688             case OP_NOT_DIGIT:
3689             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3690               RRETURN(MATCH_NOMATCH);
3691             break;
3692 
3693             case OP_DIGIT:
3694             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3695               RRETURN(MATCH_NOMATCH);
3696             break;
3697 
3698             case OP_NOT_WHITESPACE:
3699             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3700               RRETURN(MATCH_NOMATCH);
3701             break;
3702 
3703             case OP_WHITESPACE:
3704             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3705               RRETURN(MATCH_NOMATCH);
3706             break;
3707 
3708             case OP_NOT_WORDCHAR:
3709             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3710               RRETURN(MATCH_NOMATCH);
3711             break;
3712 
3713             case OP_WORDCHAR:
3714             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3715               RRETURN(MATCH_NOMATCH);
3716             break;
3717 
3718             default:
3719             return PCRE2_ERROR_INTERNAL;
3720             }
3721           }
3722         }
3723       else
3724 #endif  /* SUPPORT_UNICODE */
3725 
3726       /* Not UTF mode */
3727         {
3728         for (;;)
3729           {
3730           RMATCH(Fecode, RM33);
3731           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3732           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3733           if (Feptr >= mb->end_subject)
3734             {
3735             SCHECK_PARTIAL();
3736             RRETURN(MATCH_NOMATCH);
3737             }
3738           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3739             RRETURN(MATCH_NOMATCH);
3740           fc = *Feptr++;
3741           switch(Lctype)
3742             {
3743             case OP_ANY:               /* This is the non-NL case */
3744             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3745                 Feptr >= mb->end_subject &&
3746                 NLBLOCK->nltype == NLTYPE_FIXED &&
3747                 NLBLOCK->nllen == 2 &&
3748                 fc == NLBLOCK->nl[0])
3749               {
3750               mb->hitend = TRUE;
3751               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3752               }
3753             break;
3754 
3755             case OP_ALLANY:
3756             case OP_ANYBYTE:
3757             break;
3758 
3759             case OP_ANYNL:
3760             switch(fc)
3761               {
3762               default: RRETURN(MATCH_NOMATCH);
3763 
3764               case CHAR_CR:
3765               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3766               break;
3767 
3768               case CHAR_LF:
3769               break;
3770 
3771               case CHAR_VT:
3772               case CHAR_FF:
3773               case CHAR_NEL:
3774 #if PCRE2_CODE_UNIT_WIDTH != 8
3775               case 0x2028:
3776               case 0x2029:
3777 #endif
3778               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3779                 RRETURN(MATCH_NOMATCH);
3780               break;
3781               }
3782             break;
3783 
3784             case OP_NOT_HSPACE:
3785             switch(fc)
3786               {
3787               default: break;
3788               HSPACE_BYTE_CASES:
3789 #if PCRE2_CODE_UNIT_WIDTH != 8
3790               HSPACE_MULTIBYTE_CASES:
3791 #endif
3792               RRETURN(MATCH_NOMATCH);
3793               }
3794             break;
3795 
3796             case OP_HSPACE:
3797             switch(fc)
3798               {
3799               default: RRETURN(MATCH_NOMATCH);
3800               HSPACE_BYTE_CASES:
3801 #if PCRE2_CODE_UNIT_WIDTH != 8
3802               HSPACE_MULTIBYTE_CASES:
3803 #endif
3804               break;
3805               }
3806             break;
3807 
3808             case OP_NOT_VSPACE:
3809             switch(fc)
3810               {
3811               default: break;
3812               VSPACE_BYTE_CASES:
3813 #if PCRE2_CODE_UNIT_WIDTH != 8
3814               VSPACE_MULTIBYTE_CASES:
3815 #endif
3816               RRETURN(MATCH_NOMATCH);
3817               }
3818             break;
3819 
3820             case OP_VSPACE:
3821             switch(fc)
3822               {
3823               default: RRETURN(MATCH_NOMATCH);
3824               VSPACE_BYTE_CASES:
3825 #if PCRE2_CODE_UNIT_WIDTH != 8
3826               VSPACE_MULTIBYTE_CASES:
3827 #endif
3828               break;
3829               }
3830             break;
3831 
3832             case OP_NOT_DIGIT:
3833             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3834               RRETURN(MATCH_NOMATCH);
3835             break;
3836 
3837             case OP_DIGIT:
3838             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3839               RRETURN(MATCH_NOMATCH);
3840             break;
3841 
3842             case OP_NOT_WHITESPACE:
3843             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3844               RRETURN(MATCH_NOMATCH);
3845             break;
3846 
3847             case OP_WHITESPACE:
3848             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
3849               RRETURN(MATCH_NOMATCH);
3850             break;
3851 
3852             case OP_NOT_WORDCHAR:
3853             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
3854               RRETURN(MATCH_NOMATCH);
3855             break;
3856 
3857             case OP_WORDCHAR:
3858             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
3859               RRETURN(MATCH_NOMATCH);
3860             break;
3861 
3862             default:
3863             return PCRE2_ERROR_INTERNAL;
3864             }
3865           }
3866         }
3867       /* Control never gets here */
3868       }
3869 
3870     /* If maximizing, it is worth using inline code for speed, doing the type
3871     test once at the start (i.e. keep it out of the loop). */
3872 
3873     else
3874       {
3875       Lstart_eptr = Feptr;  /* Remember where we started */
3876 
3877 #ifdef SUPPORT_UNICODE
3878       if (proptype >= 0)
3879         {
3880         switch(proptype)
3881           {
3882           case PT_ANY:
3883           for (i = Lmin; i < Lmax; i++)
3884             {
3885             int len = 1;
3886             if (Feptr >= mb->end_subject)
3887               {
3888               SCHECK_PARTIAL();
3889               break;
3890               }
3891             GETCHARLENTEST(fc, Feptr, len);
3892             if (Lctype == OP_NOTPROP) break;
3893             Feptr+= len;
3894             }
3895           break;
3896 
3897           case PT_LAMP:
3898           for (i = Lmin; i < Lmax; i++)
3899             {
3900             int chartype;
3901             int len = 1;
3902             if (Feptr >= mb->end_subject)
3903               {
3904               SCHECK_PARTIAL();
3905               break;
3906               }
3907             GETCHARLENTEST(fc, Feptr, len);
3908             chartype = UCD_CHARTYPE(fc);
3909             if ((chartype == ucp_Lu ||
3910                  chartype == ucp_Ll ||
3911                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3912               break;
3913             Feptr+= len;
3914             }
3915           break;
3916 
3917           case PT_GC:
3918           for (i = Lmin; i < Lmax; i++)
3919             {
3920             int len = 1;
3921             if (Feptr >= mb->end_subject)
3922               {
3923               SCHECK_PARTIAL();
3924               break;
3925               }
3926             GETCHARLENTEST(fc, Feptr, len);
3927             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3928               break;
3929             Feptr+= len;
3930             }
3931           break;
3932 
3933           case PT_PC:
3934           for (i = Lmin; i < Lmax; i++)
3935             {
3936             int len = 1;
3937             if (Feptr >= mb->end_subject)
3938               {
3939               SCHECK_PARTIAL();
3940               break;
3941               }
3942             GETCHARLENTEST(fc, Feptr, len);
3943             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3944               break;
3945             Feptr+= len;
3946             }
3947           break;
3948 
3949           case PT_SC:
3950           for (i = Lmin; i < Lmax; i++)
3951             {
3952             int len = 1;
3953             if (Feptr >= mb->end_subject)
3954               {
3955               SCHECK_PARTIAL();
3956               break;
3957               }
3958             GETCHARLENTEST(fc, Feptr, len);
3959             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3960               break;
3961             Feptr+= len;
3962             }
3963           break;
3964 
3965           case PT_ALNUM:
3966           for (i = Lmin; i < Lmax; i++)
3967             {
3968             int category;
3969             int len = 1;
3970             if (Feptr >= mb->end_subject)
3971               {
3972               SCHECK_PARTIAL();
3973               break;
3974               }
3975             GETCHARLENTEST(fc, Feptr, len);
3976             category = UCD_CATEGORY(fc);
3977             if ((category == ucp_L || category == ucp_N) ==
3978                 (Lctype == OP_NOTPROP))
3979               break;
3980             Feptr+= len;
3981             }
3982           break;
3983 
3984           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3985           which means that Perl space and POSIX space are now identical. PCRE
3986           was changed at release 8.34. */
3987 
3988           case PT_SPACE:    /* Perl space */
3989           case PT_PXSPACE:  /* POSIX space */
3990           for (i = Lmin; i < Lmax; i++)
3991             {
3992             int len = 1;
3993             if (Feptr >= mb->end_subject)
3994               {
3995               SCHECK_PARTIAL();
3996               break;
3997               }
3998             GETCHARLENTEST(fc, Feptr, len);
3999             switch(fc)
4000               {
4001               HSPACE_CASES:
4002               VSPACE_CASES:
4003               if (Lctype == OP_NOTPROP) goto ENDLOOP99;  /* Break the loop */
4004               break;
4005 
4006               default:
4007               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
4008                 goto ENDLOOP99;   /* Break the loop */
4009               break;
4010               }
4011             Feptr+= len;
4012             }
4013           ENDLOOP99:
4014           break;
4015 
4016           case PT_WORD:
4017           for (i = Lmin; i < Lmax; i++)
4018             {
4019             int category;
4020             int len = 1;
4021             if (Feptr >= mb->end_subject)
4022               {
4023               SCHECK_PARTIAL();
4024               break;
4025               }
4026             GETCHARLENTEST(fc, Feptr, len);
4027             category = UCD_CATEGORY(fc);
4028             if ((category == ucp_L || category == ucp_N ||
4029                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
4030               break;
4031             Feptr+= len;
4032             }
4033           break;
4034 
4035           case PT_CLIST:
4036           for (i = Lmin; i < Lmax; i++)
4037             {
4038             const uint32_t *cp;
4039             int len = 1;
4040             if (Feptr >= mb->end_subject)
4041               {
4042               SCHECK_PARTIAL();
4043               break;
4044               }
4045             GETCHARLENTEST(fc, Feptr, len);
4046             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4047             for (;;)
4048               {
4049               if (fc < *cp)
4050                 { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
4051               if (fc == *cp++)
4052                 { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
4053               }
4054             Feptr += len;
4055             }
4056           GOT_MAX:
4057           break;
4058 
4059           case PT_UCNC:
4060           for (i = Lmin; i < Lmax; i++)
4061             {
4062             int len = 1;
4063             if (Feptr >= mb->end_subject)
4064               {
4065               SCHECK_PARTIAL();
4066               break;
4067               }
4068             GETCHARLENTEST(fc, Feptr, len);
4069             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4070                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4071                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
4072               break;
4073             Feptr += len;
4074             }
4075           break;
4076 
4077           default:
4078           return PCRE2_ERROR_INTERNAL;
4079           }
4080 
4081         /* Feptr is now past the end of the maximum run */
4082 
4083         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4084 
4085         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4086         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4087         go too far. */
4088 
4089         for(;;)
4090           {
4091           if (Feptr <= Lstart_eptr) break;
4092           RMATCH(Fecode, RM222);
4093           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4094           Feptr--;
4095           if (utf) BACKCHAR(Feptr);
4096           }
4097         }
4098 
4099       /* Match extended Unicode grapheme clusters. We will get here only if the
4100       support is in the binary; otherwise a compile-time error occurs. */
4101 
4102       else if (Lctype == OP_EXTUNI)
4103         {
4104         for (i = Lmin; i < Lmax; i++)
4105           {
4106           if (Feptr >= mb->end_subject)
4107             {
4108             SCHECK_PARTIAL();
4109             break;
4110             }
4111           else
4112             {
4113             GETCHARINCTEST(fc, Feptr);
4114             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4115               utf, NULL);
4116             }
4117           CHECK_PARTIAL();
4118           }
4119 
4120         /* Feptr is now past the end of the maximum run */
4121 
4122         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4123 
4124         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4125         of the run while backtracking because the use of \C in UTF mode can
4126         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4127         the use of \C in UTF mode is fraught with danger. */
4128 
4129         for(;;)
4130           {
4131           int lgb, rgb;
4132           PCRE2_SPTR fptr;
4133 
4134           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4135           RMATCH(Fecode, RM220);
4136           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4137 
4138           /* Backtracking over an extended grapheme cluster involves inspecting
4139           the previous two characters (if present) to see if a break is
4140           permitted between them. */
4141 
4142           Feptr--;
4143           if (!utf) fc = *Feptr; else
4144             {
4145             BACKCHAR(Feptr);
4146             GETCHAR(fc, Feptr);
4147             }
4148           rgb = UCD_GRAPHBREAK(fc);
4149 
4150           for (;;)
4151             {
4152             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4153             fptr = Feptr - 1;
4154             if (!utf) fc = *fptr; else
4155               {
4156               BACKCHAR(fptr);
4157               GETCHAR(fc, fptr);
4158               }
4159             lgb = UCD_GRAPHBREAK(fc);
4160             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4161             Feptr = fptr;
4162             rgb = lgb;
4163             }
4164           }
4165         }
4166 
4167       else
4168 #endif   /* SUPPORT_UNICODE */
4169 
4170 #ifdef SUPPORT_UNICODE
4171       if (utf)
4172         {
4173         switch(Lctype)
4174           {
4175           case OP_ANY:
4176           for (i = Lmin; i < Lmax; i++)
4177             {
4178             if (Feptr >= mb->end_subject)
4179               {
4180               SCHECK_PARTIAL();
4181               break;
4182               }
4183             if (IS_NEWLINE(Feptr)) break;
4184             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4185                 Feptr + 1 >= mb->end_subject &&
4186                 NLBLOCK->nltype == NLTYPE_FIXED &&
4187                 NLBLOCK->nllen == 2 &&
4188                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4189               {
4190               mb->hitend = TRUE;
4191               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4192               }
4193             Feptr++;
4194             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4195             }
4196           break;
4197 
4198           case OP_ALLANY:
4199           if (Lmax < UINT32_MAX)
4200             {
4201             for (i = Lmin; i < Lmax; i++)
4202               {
4203               if (Feptr >= mb->end_subject)
4204                 {
4205                 SCHECK_PARTIAL();
4206                 break;
4207                 }
4208               Feptr++;
4209               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4210               }
4211             }
4212           else
4213             {
4214             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4215             SCHECK_PARTIAL();
4216             }
4217           break;
4218 
4219           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4220 
4221           case OP_ANYBYTE:
4222           fc = Lmax - Lmin;
4223           if (fc > (uint32_t)(mb->end_subject - Feptr))
4224             {
4225             Feptr = mb->end_subject;
4226             SCHECK_PARTIAL();
4227             }
4228           else Feptr += fc;
4229           break;
4230 
4231           case OP_ANYNL:
4232           for (i = Lmin; i < Lmax; i++)
4233             {
4234             int len = 1;
4235             if (Feptr >= mb->end_subject)
4236               {
4237               SCHECK_PARTIAL();
4238               break;
4239               }
4240             GETCHARLEN(fc, Feptr, len);
4241             if (fc == CHAR_CR)
4242               {
4243               if (++Feptr >= mb->end_subject) break;
4244               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4245               }
4246             else
4247               {
4248               if (fc != CHAR_LF &&
4249                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4250                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4251 #ifndef EBCDIC
4252                     && fc != 0x2028 && fc != 0x2029
4253 #endif  /* Not EBCDIC */
4254                     )))
4255                 break;
4256               Feptr += len;
4257               }
4258             }
4259           break;
4260 
4261           case OP_NOT_HSPACE:
4262           case OP_HSPACE:
4263           for (i = Lmin; i < Lmax; i++)
4264             {
4265             BOOL gotspace;
4266             int len = 1;
4267             if (Feptr >= mb->end_subject)
4268               {
4269               SCHECK_PARTIAL();
4270               break;
4271               }
4272             GETCHARLEN(fc, Feptr, len);
4273             switch(fc)
4274               {
4275               HSPACE_CASES: gotspace = TRUE; break;
4276               default: gotspace = FALSE; break;
4277               }
4278             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4279             Feptr += len;
4280             }
4281           break;
4282 
4283           case OP_NOT_VSPACE:
4284           case OP_VSPACE:
4285           for (i = Lmin; i < Lmax; i++)
4286             {
4287             BOOL gotspace;
4288             int len = 1;
4289             if (Feptr >= mb->end_subject)
4290               {
4291               SCHECK_PARTIAL();
4292               break;
4293               }
4294             GETCHARLEN(fc, Feptr, len);
4295             switch(fc)
4296               {
4297               VSPACE_CASES: gotspace = TRUE; break;
4298               default: gotspace = FALSE; break;
4299               }
4300             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4301             Feptr += len;
4302             }
4303           break;
4304 
4305           case OP_NOT_DIGIT:
4306           for (i = Lmin; i < Lmax; i++)
4307             {
4308             int len = 1;
4309             if (Feptr >= mb->end_subject)
4310               {
4311               SCHECK_PARTIAL();
4312               break;
4313               }
4314             GETCHARLEN(fc, Feptr, len);
4315             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4316             Feptr+= len;
4317             }
4318           break;
4319 
4320           case OP_DIGIT:
4321           for (i = Lmin; i < Lmax; i++)
4322             {
4323             int len = 1;
4324             if (Feptr >= mb->end_subject)
4325               {
4326               SCHECK_PARTIAL();
4327               break;
4328               }
4329             GETCHARLEN(fc, Feptr, len);
4330             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4331             Feptr+= len;
4332             }
4333           break;
4334 
4335           case OP_NOT_WHITESPACE:
4336           for (i = Lmin; i < Lmax; i++)
4337             {
4338             int len = 1;
4339             if (Feptr >= mb->end_subject)
4340               {
4341               SCHECK_PARTIAL();
4342               break;
4343               }
4344             GETCHARLEN(fc, Feptr, len);
4345             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4346             Feptr+= len;
4347             }
4348           break;
4349 
4350           case OP_WHITESPACE:
4351           for (i = Lmin; i < Lmax; i++)
4352             {
4353             int len = 1;
4354             if (Feptr >= mb->end_subject)
4355               {
4356               SCHECK_PARTIAL();
4357               break;
4358               }
4359             GETCHARLEN(fc, Feptr, len);
4360             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4361             Feptr+= len;
4362             }
4363           break;
4364 
4365           case OP_NOT_WORDCHAR:
4366           for (i = Lmin; i < Lmax; i++)
4367             {
4368             int len = 1;
4369             if (Feptr >= mb->end_subject)
4370               {
4371               SCHECK_PARTIAL();
4372               break;
4373               }
4374             GETCHARLEN(fc, Feptr, len);
4375             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4376             Feptr+= len;
4377             }
4378           break;
4379 
4380           case OP_WORDCHAR:
4381           for (i = Lmin; i < Lmax; i++)
4382             {
4383             int len = 1;
4384             if (Feptr >= mb->end_subject)
4385               {
4386               SCHECK_PARTIAL();
4387               break;
4388               }
4389             GETCHARLEN(fc, Feptr, len);
4390             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4391             Feptr+= len;
4392             }
4393           break;
4394 
4395           default:
4396           return PCRE2_ERROR_INTERNAL;
4397           }
4398 
4399         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4400 
4401         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4402         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4403         too far. */
4404 
4405         for(;;)
4406           {
4407           if (Feptr <= Lstart_eptr) break;
4408           RMATCH(Fecode, RM221);
4409           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4410           Feptr--;
4411           BACKCHAR(Feptr);
4412           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4413               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4414             Feptr--;
4415           }
4416         }
4417       else
4418 #endif  /* SUPPORT_UNICODE */
4419 
4420       /* Not UTF mode */
4421         {
4422         switch(Lctype)
4423           {
4424           case OP_ANY:
4425           for (i = Lmin; i < Lmax; i++)
4426             {
4427             if (Feptr >= mb->end_subject)
4428               {
4429               SCHECK_PARTIAL();
4430               break;
4431               }
4432             if (IS_NEWLINE(Feptr)) break;
4433             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4434                 Feptr + 1 >= mb->end_subject &&
4435                 NLBLOCK->nltype == NLTYPE_FIXED &&
4436                 NLBLOCK->nllen == 2 &&
4437                 *Feptr == NLBLOCK->nl[0])
4438               {
4439               mb->hitend = TRUE;
4440               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4441               }
4442             Feptr++;
4443             }
4444           break;
4445 
4446           case OP_ALLANY:
4447           case OP_ANYBYTE:
4448           fc = Lmax - Lmin;
4449           if (fc > (uint32_t)(mb->end_subject - Feptr))
4450             {
4451             Feptr = mb->end_subject;
4452             SCHECK_PARTIAL();
4453             }
4454           else Feptr += fc;
4455           break;
4456 
4457           case OP_ANYNL:
4458           for (i = Lmin; i < Lmax; i++)
4459             {
4460             if (Feptr >= mb->end_subject)
4461               {
4462               SCHECK_PARTIAL();
4463               break;
4464               }
4465             fc = *Feptr;
4466             if (fc == CHAR_CR)
4467               {
4468               if (++Feptr >= mb->end_subject) break;
4469               if (*Feptr == CHAR_LF) Feptr++;
4470               }
4471             else
4472               {
4473               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4474                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4475 #if PCRE2_CODE_UNIT_WIDTH != 8
4476                  && fc != 0x2028 && fc != 0x2029
4477 #endif
4478                  ))) break;
4479               Feptr++;
4480               }
4481             }
4482           break;
4483 
4484           case OP_NOT_HSPACE:
4485           for (i = Lmin; i < Lmax; i++)
4486             {
4487             if (Feptr >= mb->end_subject)
4488               {
4489               SCHECK_PARTIAL();
4490               break;
4491               }
4492             switch(*Feptr)
4493               {
4494               default: Feptr++; break;
4495               HSPACE_BYTE_CASES:
4496 #if PCRE2_CODE_UNIT_WIDTH != 8
4497               HSPACE_MULTIBYTE_CASES:
4498 #endif
4499               goto ENDLOOP00;
4500               }
4501             }
4502           ENDLOOP00:
4503           break;
4504 
4505           case OP_HSPACE:
4506           for (i = Lmin; i < Lmax; i++)
4507             {
4508             if (Feptr >= mb->end_subject)
4509               {
4510               SCHECK_PARTIAL();
4511               break;
4512               }
4513             switch(*Feptr)
4514               {
4515               default: goto ENDLOOP01;
4516               HSPACE_BYTE_CASES:
4517 #if PCRE2_CODE_UNIT_WIDTH != 8
4518               HSPACE_MULTIBYTE_CASES:
4519 #endif
4520               Feptr++; break;
4521               }
4522             }
4523           ENDLOOP01:
4524           break;
4525 
4526           case OP_NOT_VSPACE:
4527           for (i = Lmin; i < Lmax; i++)
4528             {
4529             if (Feptr >= mb->end_subject)
4530               {
4531               SCHECK_PARTIAL();
4532               break;
4533               }
4534             switch(*Feptr)
4535               {
4536               default: Feptr++; break;
4537               VSPACE_BYTE_CASES:
4538 #if PCRE2_CODE_UNIT_WIDTH != 8
4539               VSPACE_MULTIBYTE_CASES:
4540 #endif
4541               goto ENDLOOP02;
4542               }
4543             }
4544           ENDLOOP02:
4545           break;
4546 
4547           case OP_VSPACE:
4548           for (i = Lmin; i < Lmax; i++)
4549             {
4550             if (Feptr >= mb->end_subject)
4551               {
4552               SCHECK_PARTIAL();
4553               break;
4554               }
4555             switch(*Feptr)
4556               {
4557               default: goto ENDLOOP03;
4558               VSPACE_BYTE_CASES:
4559 #if PCRE2_CODE_UNIT_WIDTH != 8
4560               VSPACE_MULTIBYTE_CASES:
4561 #endif
4562               Feptr++; break;
4563               }
4564             }
4565           ENDLOOP03:
4566           break;
4567 
4568           case OP_NOT_DIGIT:
4569           for (i = Lmin; i < Lmax; i++)
4570             {
4571             if (Feptr >= mb->end_subject)
4572               {
4573               SCHECK_PARTIAL();
4574               break;
4575               }
4576             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4577               break;
4578             Feptr++;
4579             }
4580           break;
4581 
4582           case OP_DIGIT:
4583           for (i = Lmin; i < Lmax; i++)
4584             {
4585             if (Feptr >= mb->end_subject)
4586               {
4587               SCHECK_PARTIAL();
4588               break;
4589               }
4590             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4591               break;
4592             Feptr++;
4593             }
4594           break;
4595 
4596           case OP_NOT_WHITESPACE:
4597           for (i = Lmin; i < Lmax; i++)
4598             {
4599             if (Feptr >= mb->end_subject)
4600               {
4601               SCHECK_PARTIAL();
4602               break;
4603               }
4604             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4605               break;
4606             Feptr++;
4607             }
4608           break;
4609 
4610           case OP_WHITESPACE:
4611           for (i = Lmin; i < Lmax; i++)
4612             {
4613             if (Feptr >= mb->end_subject)
4614               {
4615               SCHECK_PARTIAL();
4616               break;
4617               }
4618             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4619               break;
4620             Feptr++;
4621             }
4622           break;
4623 
4624           case OP_NOT_WORDCHAR:
4625           for (i = Lmin; i < Lmax; i++)
4626             {
4627             if (Feptr >= mb->end_subject)
4628               {
4629               SCHECK_PARTIAL();
4630               break;
4631               }
4632             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4633               break;
4634             Feptr++;
4635             }
4636           break;
4637 
4638           case OP_WORDCHAR:
4639           for (i = Lmin; i < Lmax; i++)
4640             {
4641             if (Feptr >= mb->end_subject)
4642               {
4643               SCHECK_PARTIAL();
4644               break;
4645               }
4646             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4647               break;
4648             Feptr++;
4649             }
4650           break;
4651 
4652           default:
4653           return PCRE2_ERROR_INTERNAL;
4654           }
4655 
4656         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4657 
4658         for (;;)
4659           {
4660           if (Feptr == Lstart_eptr) break;
4661           RMATCH(Fecode, RM34);
4662           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4663           Feptr--;
4664           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4665               Feptr[-1] == CHAR_CR) Feptr--;
4666           }
4667         }
4668       }
4669     break;  /* End of repeat character type processing */
4670 
4671 #undef Lstart_eptr
4672 #undef Lmin
4673 #undef Lmax
4674 #undef Lctype
4675 #undef Lpropvalue
4676 
4677 
4678     /* ===================================================================== */
4679     /* Match a back reference, possibly repeatedly. Look past the end of the
4680     item to see if there is repeat information following. The OP_REF and
4681     OP_REFI opcodes are used for a reference to a numbered group or to a
4682     non-duplicated named group. For a duplicated named group, OP_DNREF and
4683     OP_DNREFI are used. In this case we must scan the list of groups to which
4684     the name refers, and use the first one that is set. */
4685 
4686 #define Lmin      F->temp_32[0]
4687 #define Lmax      F->temp_32[1]
4688 #define Lcaseless F->temp_32[2]
4689 #define Lstart    F->temp_sptr[0]
4690 #define Loffset   F->temp_size
4691 
4692     case OP_DNREF:
4693     case OP_DNREFI:
4694     Lcaseless = (Fop == OP_DNREFI);
4695       {
4696       int count = GET2(Fecode, 1+IMM2_SIZE);
4697       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4698       Fecode += 1 + 2*IMM2_SIZE;
4699 
4700       while (count-- > 0)
4701         {
4702         Loffset = (GET2(slot, 0) << 1) - 2;
4703         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4704         slot += mb->name_entry_size;
4705         }
4706       }
4707     goto REF_REPEAT;
4708 
4709     case OP_REF:
4710     case OP_REFI:
4711     Lcaseless = (Fop == OP_REFI);
4712     Loffset = (GET2(Fecode, 1) << 1) - 2;
4713     Fecode += 1 + IMM2_SIZE;
4714 
4715     /* Set up for repetition, or handle the non-repeated case. The maximum and
4716     minimum must be in the heap frame, but as they are short-term values, we
4717     use temporary fields. */
4718 
4719     REF_REPEAT:
4720     switch (*Fecode)
4721       {
4722       case OP_CRSTAR:
4723       case OP_CRMINSTAR:
4724       case OP_CRPLUS:
4725       case OP_CRMINPLUS:
4726       case OP_CRQUERY:
4727       case OP_CRMINQUERY:
4728       fc = *Fecode++ - OP_CRSTAR;
4729       Lmin = rep_min[fc];
4730       Lmax = rep_max[fc];
4731       reptype = rep_typ[fc];
4732       break;
4733 
4734       case OP_CRRANGE:
4735       case OP_CRMINRANGE:
4736       Lmin = GET2(Fecode, 1);
4737       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4738       reptype = rep_typ[*Fecode - OP_CRSTAR];
4739       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4740       Fecode += 1 + 2 * IMM2_SIZE;
4741       break;
4742 
4743       default:                  /* No repeat follows */
4744         {
4745         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4746         if (rrc != 0)
4747           {
4748           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4749           CHECK_PARTIAL();
4750           RRETURN(MATCH_NOMATCH);
4751           }
4752         }
4753       Feptr += length;
4754       continue;              /* With the main loop */
4755       }
4756 
4757     /* Handle repeated back references. If a set group has length zero, just
4758     continue with the main loop, because it matches however many times. For an
4759     unset reference, if the minimum is zero, we can also just continue. We can
4760     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4761     group behave as a zero-length group. For any other unset cases, carrying
4762     on will result in NOMATCH. */
4763 
4764     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4765       {
4766       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4767       }
4768     else  /* Group is not set */
4769       {
4770       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4771         continue;
4772       }
4773 
4774     /* First, ensure the minimum number of matches are present. */
4775 
4776     for (i = 1; i <= Lmin; i++)
4777       {
4778       PCRE2_SIZE slength;
4779       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4780       if (rrc != 0)
4781         {
4782         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4783         CHECK_PARTIAL();
4784         RRETURN(MATCH_NOMATCH);
4785         }
4786       Feptr += slength;
4787       }
4788 
4789     /* If min = max, we are done. They are not both allowed to be zero. */
4790 
4791     if (Lmin == Lmax) continue;
4792 
4793     /* If minimizing, keep trying and advancing the pointer. */
4794 
4795     if (reptype == REPTYPE_MIN)
4796       {
4797       for (;;)
4798         {
4799         PCRE2_SIZE slength;
4800         RMATCH(Fecode, RM20);
4801         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4802         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4803         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4804         if (rrc != 0)
4805           {
4806           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4807           CHECK_PARTIAL();
4808           RRETURN(MATCH_NOMATCH);
4809           }
4810         Feptr += slength;
4811         }
4812       /* Control never gets here */
4813       }
4814 
4815     /* If maximizing, find the longest string and work backwards, as long as
4816     the matched lengths for each iteration are the same. */
4817 
4818     else
4819       {
4820       BOOL samelengths = TRUE;
4821       Lstart = Feptr;     /* Starting position */
4822       Flength = Fovector[Loffset+1] - Fovector[Loffset];
4823 
4824       for (i = Lmin; i < Lmax; i++)
4825         {
4826         PCRE2_SIZE slength;
4827         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4828         if (rrc != 0)
4829           {
4830           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
4831           the soft partial matching case. */
4832 
4833           if (rrc > 0 && mb->partial != 0 &&
4834               mb->end_subject > mb->start_used_ptr)
4835             {
4836             mb->hitend = TRUE;
4837             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4838             }
4839           break;
4840           }
4841 
4842         if (slength != Flength) samelengths = FALSE;
4843         Feptr += slength;
4844         }
4845 
4846       /* If the length matched for each repetition is the same as the length of
4847       the captured group, we can easily work backwards. This is the normal
4848       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
4849       characters whose lengths (in terms of code units) differ. However, this
4850       is very rare, so we handle it by re-matching fewer and fewer times. */
4851 
4852       if (samelengths)
4853         {
4854         while (Feptr >= Lstart)
4855           {
4856           RMATCH(Fecode, RM21);
4857           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4858           Feptr -= Flength;
4859           }
4860         }
4861 
4862       /* The rare case of non-matching lengths. Re-scan the repetition for each
4863       iteration. We know that match_ref() will succeed every time. */
4864 
4865       else
4866         {
4867         Lmax = i;
4868         for (;;)
4869           {
4870           RMATCH(Fecode, RM22);
4871           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4872           if (Feptr == Lstart) break; /* Failed after minimal repetition */
4873           Feptr = Lstart;
4874           Lmax--;
4875           for (i = Lmin; i < Lmax; i++)
4876             {
4877             PCRE2_SIZE slength;
4878             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
4879             Feptr += slength;
4880             }
4881           }
4882         }
4883 
4884       RRETURN(MATCH_NOMATCH);
4885       }
4886     /* Control never gets here */
4887 
4888 #undef Lcaseless
4889 #undef Lmin
4890 #undef Lmax
4891 #undef Lstart
4892 #undef Loffset
4893 
4894 
4895 
4896 /* ========================================================================= */
4897 /*           Opcodes for the start of various parenthesized items            */
4898 /* ========================================================================= */
4899 
4900     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
4901     (*THEN) is within the current branch by comparing the address of OP_THEN
4902     that is passed back with the end of the branch. If (*THEN) is within the
4903     current branch, and the branch is one of two or more alternatives (it
4904     either starts or ends with OP_ALT), we have reached the limit of THEN's
4905     action, so convert the return code to NOMATCH, which will cause normal
4906     backtracking to happen from now on. Otherwise, THEN is passed back to an
4907     outer alternative. This implements Perl's treatment of parenthesized
4908     groups, where a group not containing | does not affect the current
4909     alternative, that is, (X) is NOT the same as (X|(*F)). */
4910 
4911 
4912     /* ===================================================================== */
4913     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
4914     bracket group, indicating that it may occur zero times. It may repeat
4915     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
4916     the pattern. Brackets with fixed upper repeat limits are compiled as a
4917     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
4918     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
4919 
4920 #define Lnext_ecode F->temp_sptr[0]
4921 
4922     case OP_BRAZERO:
4923     Lnext_ecode = Fecode + 1;
4924     RMATCH(Lnext_ecode, RM9);
4925     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4926     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4927     Fecode = Lnext_ecode + 1 + LINK_SIZE;
4928     break;
4929 
4930     case OP_BRAMINZERO:
4931     Lnext_ecode = Fecode + 1;
4932     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4933     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
4934     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4935     Fecode++;
4936     break;
4937 
4938 #undef Lnext_ecode
4939 
4940     case OP_SKIPZERO:
4941     Fecode++;
4942     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
4943     Fecode += 1 + LINK_SIZE;
4944     break;
4945 
4946 
4947     /* ===================================================================== */
4948     /* Handle possessive brackets with an unlimited repeat. The end of these
4949     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
4950     going further in the pattern. */
4951 
4952 #define Lframe_type    F->temp_32[0]
4953 #define Lmatched_once  F->temp_32[1]
4954 #define Lzero_allowed  F->temp_32[2]
4955 #define Lstart_eptr    F->temp_sptr[0]
4956 #define Lstart_group   F->temp_sptr[1]
4957 
4958     case OP_BRAPOSZERO:
4959     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
4960     Fecode += 1;
4961     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
4962       goto POSSESSIVE_CAPTURE;
4963     goto POSSESSIVE_NON_CAPTURE;
4964 
4965     case OP_BRAPOS:
4966     case OP_SBRAPOS:
4967     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4968 
4969     POSSESSIVE_NON_CAPTURE:
4970     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
4971     goto POSSESSIVE_GROUP;
4972 
4973     case OP_CBRAPOS:
4974     case OP_SCBRAPOS:
4975     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4976 
4977     POSSESSIVE_CAPTURE:
4978     number = GET2(Fecode, 1+LINK_SIZE);
4979     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
4980 
4981     POSSESSIVE_GROUP:
4982     Lmatched_once = FALSE;               /* Never matched */
4983     Lstart_group = Fecode;               /* Start of this group */
4984 
4985     for (;;)
4986       {
4987       Lstart_eptr = Feptr;               /* Position at group start */
4988       group_frame_type = Lframe_type;
4989       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
4990       if (rrc == MATCH_KETRPOS)
4991         {
4992         Lmatched_once = TRUE;            /* Matched at least once */
4993         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
4994           {
4995           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
4996           break;
4997           }
4998 
4999         Fecode = Lstart_group;
5000         continue;
5001         }
5002 
5003       /* See comment above about handling THEN. */
5004 
5005       if (rrc == MATCH_THEN)
5006         {
5007         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5008         if (mb->verb_ecode_ptr < next_ecode &&
5009             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5010           rrc = MATCH_NOMATCH;
5011         }
5012 
5013       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5014       Fecode += GET(Fecode, 1);
5015       if (*Fecode != OP_ALT) break;
5016       }
5017 
5018     /* Success if matched something or zero repeat allowed */
5019 
5020     if (Lmatched_once || Lzero_allowed)
5021       {
5022       Fecode += 1 + LINK_SIZE;
5023       break;
5024       }
5025 
5026     RRETURN(MATCH_NOMATCH);
5027 
5028 #undef Lmatched_once
5029 #undef Lzero_allowed
5030 #undef Lframe_type
5031 #undef Lstart_eptr
5032 #undef Lstart_group
5033 
5034 
5035     /* ===================================================================== */
5036     /* Handle non-capturing brackets that cannot match an empty string. When we
5037     get to the final alternative within the brackets, as long as there are no
5038     THEN's in the pattern, we can optimize by not recording a new backtracking
5039     point. (Ideally we should test for a THEN within this group, but we don't
5040     have that information.) Don't do this if we are at the very top level,
5041     however, because that would make handling assertions and once-only brackets
5042     messier when there is nothing to go back to. */
5043 
5044 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5045 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5046 
5047     case OP_BRA:
5048     if (mb->hasthen || Frdepth == 0)
5049       {
5050       Lframe_type = 0;
5051       goto GROUPLOOP;
5052       }
5053 
5054     for (;;)
5055       {
5056       Lnext_branch = Fecode + GET(Fecode, 1);
5057       if (*Lnext_branch != OP_ALT) break;
5058 
5059       /* This is never the final branch. We do not need to test for MATCH_THEN
5060       here because this code is not used when there is a THEN in the pattern. */
5061 
5062       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5063       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5064       Fecode = Lnext_branch;
5065       }
5066 
5067     /* Hit the start of the final branch. Continue at this level. */
5068 
5069     Fecode += PRIV(OP_lengths)[*Fecode];
5070     break;
5071 
5072 #undef Lnext_branch
5073 
5074 
5075     /* ===================================================================== */
5076     /* Handle a capturing bracket, other than those that are possessive with an
5077     unlimited repeat. */
5078 
5079     case OP_CBRA:
5080     case OP_SCBRA:
5081     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5082     goto GROUPLOOP;
5083 
5084 
5085     /* ===================================================================== */
5086     /* Atomic groups and non-capturing brackets that can match an empty string
5087     must record a backtracking point and also set up a chained frame. */
5088 
5089     case OP_ONCE:
5090     case OP_SCRIPT_RUN:
5091     case OP_SBRA:
5092     Lframe_type = GF_NOCAPTURE | Fop;
5093 
5094     GROUPLOOP:
5095     for (;;)
5096       {
5097       group_frame_type = Lframe_type;
5098       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5099       if (rrc == MATCH_THEN)
5100         {
5101         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5102         if (mb->verb_ecode_ptr < next_ecode &&
5103             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5104           rrc = MATCH_NOMATCH;
5105         }
5106       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5107       Fecode += GET(Fecode, 1);
5108       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5109       }
5110     /* Control never reaches here. */
5111 
5112 #undef Lframe_type
5113 
5114 
5115     /* ===================================================================== */
5116     /* Recursion either matches the current regex, or some subexpression. The
5117     offset data is the offset to the starting bracket from the start of the
5118     whole pattern. (This is so that it works from duplicated subpatterns.) */
5119 
5120 #define Lframe_type F->temp_32[0]
5121 #define Lstart_branch F->temp_sptr[0]
5122 
5123     case OP_RECURSE:
5124     bracode = mb->start_code + GET(Fecode, 1);
5125     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5126 
5127     /* If we are already in a recursion, check for repeating the same one
5128     without advancing the subject pointer. This should catch convoluted mutual
5129     recursions. (Some simple cases are caught at compile time.) */
5130 
5131     if (Fcurrent_recurse != RECURSE_UNSET)
5132       {
5133       offset = Flast_group_offset;
5134       while (offset != PCRE2_UNSET)
5135         {
5136         N = (heapframe *)((char *)mb->match_frames + offset);
5137         P = (heapframe *)((char *)N - frame_size);
5138         if (N->group_frame_type == (GF_RECURSE | number))
5139           {
5140           if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5141           break;
5142           }
5143         offset = P->last_group_offset;
5144         }
5145       }
5146 
5147     /* Now run the recursion, branch by branch. */
5148 
5149     Lstart_branch = bracode;
5150     Lframe_type = GF_RECURSE | number;
5151 
5152     for (;;)
5153       {
5154       PCRE2_SPTR next_ecode;
5155 
5156       group_frame_type = Lframe_type;
5157       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5158       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5159 
5160       /* Handle backtracking verbs, which are defined in a range that can
5161       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5162       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5163 
5164       When one of these verbs triggers, the current recursion group number is
5165       recorded. If it matches the recursion we are processing, the verb
5166       happened within the recursion and we must deal with it. Otherwise it must
5167       have happened after the recursion completed, and so has to be passed
5168       back. See comment above about handling THEN. */
5169 
5170       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5171           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5172         {
5173         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5174             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5175           rrc = MATCH_NOMATCH;
5176         else RRETURN(MATCH_NOMATCH);
5177         }
5178 
5179       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5180       OP_ACCEPT code. Nothing needs to be done here. */
5181 
5182       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5183       Lstart_branch = next_ecode;
5184       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5185       }
5186     /* Control never reaches here. */
5187 
5188 #undef Lframe_type
5189 #undef Lstart_branch
5190 
5191 
5192     /* ===================================================================== */
5193     /* Positive assertions are like other groups except that PCRE doesn't allow
5194     the effect of (*THEN) to escape beyond an assertion; it is therefore
5195     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5196     captures and mark retained. Any other return is an error. */
5197 
5198 #define Lframe_type  F->temp_32[0]
5199 
5200     case OP_ASSERT:
5201     case OP_ASSERTBACK:
5202     case OP_ASSERT_NA:
5203     case OP_ASSERTBACK_NA:
5204     Lframe_type = GF_NOCAPTURE | Fop;
5205     for (;;)
5206       {
5207       group_frame_type = Lframe_type;
5208       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5209       if (rrc == MATCH_ACCEPT)
5210         {
5211         memcpy(Fovector,
5212               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5213               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5214         Foffset_top = assert_accept_frame->offset_top;
5215         Fmark = assert_accept_frame->mark;
5216         break;
5217         }
5218       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5219       Fecode += GET(Fecode, 1);
5220       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5221       }
5222 
5223     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5224     Fecode += 1 + LINK_SIZE;
5225     break;
5226 
5227 #undef Lframe_type
5228 
5229 
5230     /* ===================================================================== */
5231     /* Handle negative assertions. Loop for each non-matching branch as for
5232     positive assertions. */
5233 
5234 #define Lframe_type  F->temp_32[0]
5235 
5236     case OP_ASSERT_NOT:
5237     case OP_ASSERTBACK_NOT:
5238     Lframe_type  = GF_NOCAPTURE | Fop;
5239 
5240     for (;;)
5241       {
5242       group_frame_type = Lframe_type;
5243       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5244       switch(rrc)
5245         {
5246         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5247         case MATCH_MATCH:
5248         RRETURN (MATCH_NOMATCH);
5249 
5250         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5251         case MATCH_THEN:
5252         Fecode += GET(Fecode, 1);
5253         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5254         break;
5255 
5256         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5257         case MATCH_SKIP:
5258         case MATCH_PRUNE:
5259         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5260         goto ASSERT_NOT_FAILED;
5261 
5262         default:             /* Pass back any other return */
5263         RRETURN(rrc);
5264         }
5265       }
5266 
5267     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5268     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5269     negative assertion, so carry on. */
5270 
5271     ASSERT_NOT_FAILED:
5272     Fecode += 1 + LINK_SIZE;
5273     break;
5274 
5275 #undef Lframe_type
5276 
5277 
5278     /* ===================================================================== */
5279     /* The callout item calls an external function, if one is provided, passing
5280     details of the match so far. This is mainly for debugging, though the
5281     function is able to force a failure. */
5282 
5283     case OP_CALLOUT:
5284     case OP_CALLOUT_STR:
5285     rrc = do_callout(F, mb, &length);
5286     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5287     if (rrc < 0) RRETURN(rrc);
5288     Fecode += length;
5289     break;
5290 
5291 
5292     /* ===================================================================== */
5293     /* Conditional group: compilation checked that there are no more than two
5294     branches. If the condition is false, skipping the first branch takes us
5295     past the end of the item if there is only one branch, but that's exactly
5296     what we want. */
5297 
5298     case OP_COND:
5299     case OP_SCOND:
5300 
5301     /* The variable Flength will be added to Fecode when the condition is
5302     false, to get to the second branch. Setting it to the offset to the ALT or
5303     KET, then incrementing Fecode achieves this effect. However, if the second
5304     branch is non-existent, we must point to the KET so that the end of the
5305     group is correctly processed. We now have Fecode pointing to the condition
5306     or callout. */
5307 
5308     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5309     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5310     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5311 
5312     /* Because of the way auto-callout works during compile, a callout item is
5313     inserted between OP_COND and an assertion condition. Such a callout can
5314     also be inserted manually. */
5315 
5316     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5317       {
5318       rrc = do_callout(F, mb, &length);
5319       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5320       if (rrc < 0) RRETURN(rrc);
5321 
5322       /* Advance Fecode past the callout, so it now points to the condition. We
5323       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5324 
5325       Fecode += length;
5326       Flength -= length;
5327       }
5328 
5329     /* Test the various possible conditions */
5330 
5331     condition = FALSE;
5332     switch(*Fecode)
5333       {
5334       case OP_RREF:                  /* Group recursion test */
5335       if (Fcurrent_recurse != RECURSE_UNSET)
5336         {
5337         number = GET2(Fecode, 1);
5338         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5339         }
5340       break;
5341 
5342       case OP_DNRREF:       /* Duplicate named group recursion test */
5343       if (Fcurrent_recurse != RECURSE_UNSET)
5344         {
5345         int count = GET2(Fecode, 1 + IMM2_SIZE);
5346         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5347         while (count-- > 0)
5348           {
5349           number = GET2(slot, 0);
5350           condition = number == Fcurrent_recurse;
5351           if (condition) break;
5352           slot += mb->name_entry_size;
5353           }
5354         }
5355       break;
5356 
5357       case OP_CREF:                         /* Numbered group used test */
5358       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5359       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5360       break;
5361 
5362       case OP_DNCREF:      /* Duplicate named group used test */
5363         {
5364         int count = GET2(Fecode, 1 + IMM2_SIZE);
5365         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5366         while (count-- > 0)
5367           {
5368           offset = (GET2(slot, 0) << 1) - 2;
5369           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5370           if (condition) break;
5371           slot += mb->name_entry_size;
5372           }
5373         }
5374       break;
5375 
5376       case OP_FALSE:
5377       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5378       break;
5379 
5380       case OP_TRUE:
5381       condition = TRUE;
5382       break;
5383 
5384       /* The condition is an assertion. Run code similar to the assertion code
5385       above. */
5386 
5387 #define Lpositive      F->temp_32[0]
5388 #define Lstart_branch  F->temp_sptr[0]
5389 
5390       default:
5391       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5392       Lstart_branch = Fecode;
5393 
5394       for (;;)
5395         {
5396         group_frame_type = GF_CONDASSERT | *Fecode;
5397         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5398 
5399         switch(rrc)
5400           {
5401           case MATCH_ACCEPT:  /* Save captures */
5402           memcpy(Fovector,
5403                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5404                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5405           Foffset_top = assert_accept_frame->offset_top;
5406 
5407           /* Fall through */
5408           /* In the case of a match, the captures have already been put into
5409           the current frame. */
5410 
5411           case MATCH_MATCH:
5412           condition = Lpositive;   /* TRUE for positive assertion */
5413           break;
5414 
5415           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5416           assertion; it is therefore always treated as NOMATCH. */
5417 
5418           case MATCH_NOMATCH:
5419           case MATCH_THEN:
5420           Lstart_branch += GET(Lstart_branch, 1);
5421           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5422           condition = !Lpositive;  /* TRUE for negative assertion */
5423           break;
5424 
5425           /* These force no match without checking other branches. */
5426 
5427           case MATCH_COMMIT:
5428           case MATCH_SKIP:
5429           case MATCH_PRUNE:
5430           condition = !Lpositive;
5431           break;
5432 
5433           default:
5434           RRETURN(rrc);
5435           }
5436         break;  /* Out of the branch loop */
5437         }
5438 
5439       /* If the condition is true, find the end of the assertion so that
5440       advancing past it gets us to the start of the first branch. */
5441 
5442       if (condition)
5443         {
5444         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5445         }
5446       break;  /* End of assertion condition */
5447       }
5448 
5449 #undef Lpositive
5450 #undef Lstart_branch
5451 
5452     /* Choose branch according to the condition. */
5453 
5454     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5455 
5456     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5457     group that might match an empty string. We must therefore descend a level
5458     so that the start is remembered for checking. For OP_COND we can just
5459     continue at this level. */
5460 
5461     if (Fop == OP_SCOND)
5462       {
5463       group_frame_type  = GF_NOCAPTURE | Fop;
5464       RMATCH(Fecode, RM35);
5465       RRETURN(rrc);
5466       }
5467     break;
5468 
5469 
5470 
5471 /* ========================================================================= */
5472 /*                  End of start of parenthesis opcodes                      */
5473 /* ========================================================================= */
5474 
5475 
5476     /* ===================================================================== */
5477     /* Move the subject pointer back. This occurs only at the start of each
5478     branch of a lookbehind assertion. If we are too close to the start to move
5479     back, fail. When working with UTF-8 we move back a number of characters,
5480     not bytes. */
5481 
5482     case OP_REVERSE:
5483     number = GET(Fecode, 1);
5484 #ifdef SUPPORT_UNICODE
5485     if (utf)
5486       {
5487       while (number-- > 0)
5488         {
5489         if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5490         Feptr--;
5491         BACKCHAR(Feptr);
5492         }
5493       }
5494     else
5495 #endif
5496 
5497     /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5498 
5499       {
5500       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5501       Feptr -= number;
5502       }
5503 
5504     /* Save the earliest consulted character, then skip to next opcode */
5505 
5506     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5507     Fecode += 1 + LINK_SIZE;
5508     break;
5509 
5510 
5511     /* ===================================================================== */
5512     /* An alternation is the end of a branch; scan along to find the end of the
5513     bracketed group. */
5514 
5515     case OP_ALT:
5516     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5517     break;
5518 
5519 
5520     /* ===================================================================== */
5521     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5522     starting frame was added to the chained frames in order to remember the
5523     starting subject position for the group. */
5524 
5525     case OP_KET:
5526     case OP_KETRMIN:
5527     case OP_KETRMAX:
5528     case OP_KETRPOS:
5529 
5530     bracode = Fecode - GET(Fecode, 1);
5531 
5532     /* Point N to the frame at the start of the most recent group.
5533     Remember the subject pointer at the start of the group. */
5534 
5535     if (*bracode != OP_BRA && *bracode != OP_COND)
5536       {
5537       N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
5538       P = (heapframe *)((char *)N - frame_size);
5539       Flast_group_offset = P->last_group_offset;
5540 
5541 #ifdef DEBUG_SHOW_RMATCH
5542       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5543         N->rdepth, N->group_frame_type,
5544         (char *)P->eptr - (char *)mb->start_subject);
5545 #endif
5546 
5547       /* If we are at the end of an assertion that is a condition, return a
5548       match, discarding any intermediate backtracking points. Copy back the
5549       mark setting and the captures into the frame before N so that they are
5550       set on return. Doing this for all assertions, both positive and negative,
5551       seems to match what Perl does. */
5552 
5553       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5554         {
5555         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5556           Foffset_top * sizeof(PCRE2_SIZE));
5557         P->offset_top = Foffset_top;
5558         P->mark = Fmark;
5559         Fback_frame = (char *)F - (char *)P;
5560         RRETURN(MATCH_MATCH);
5561         }
5562       }
5563     else P = NULL;   /* Indicates starting frame not recorded */
5564 
5565     /* The group was not a conditional assertion. */
5566 
5567     switch (*bracode)
5568       {
5569       case OP_BRA:    /* No need to do anything for these */
5570       case OP_COND:
5571       case OP_SCOND:
5572       break;
5573 
5574       /* Non-atomic positive assertions are like OP_BRA, except that the
5575       subject pointer must be put back to where it was at the start of the
5576       assertion. */
5577 
5578       case OP_ASSERT_NA:
5579       case OP_ASSERTBACK_NA:
5580       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5581       Feptr = P->eptr;
5582       break;
5583 
5584       /* Atomic positive assertions are like OP_ONCE, except that in addition
5585       the subject pointer must be put back to where it was at the start of the
5586       assertion. */
5587 
5588       case OP_ASSERT:
5589       case OP_ASSERTBACK:
5590       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5591       Feptr = P->eptr;
5592       /* Fall through */
5593 
5594       /* For an atomic group, discard internal backtracking points. We must
5595       also ensure that any remaining branches within the top-level of the group
5596       are not tried. Do this by adjusting the code pointer within the backtrack
5597       frame so that it points to the final branch. */
5598 
5599       case OP_ONCE:
5600       Fback_frame = ((char *)F - (char *)P);
5601       for (;;)
5602         {
5603         uint32_t y = GET(P->ecode,1);
5604         if ((P->ecode)[y] != OP_ALT) break;
5605         P->ecode += y;
5606         }
5607       break;
5608 
5609       /* A matching negative assertion returns MATCH, which is turned into
5610       NOMATCH at the assertion level. */
5611 
5612       case OP_ASSERT_NOT:
5613       case OP_ASSERTBACK_NOT:
5614       RRETURN(MATCH_MATCH);
5615 
5616       /* At the end of a script run, apply the script-checking rules. This code
5617       will never by exercised if Unicode support it not compiled, because in
5618       that environment script runs cause an error at compile time. */
5619 
5620       case OP_SCRIPT_RUN:
5621       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5622       break;
5623 
5624       /* Whole-pattern recursion is coded as a recurse into group 0, so it
5625       won't be picked up here. Instead, we catch it when the OP_END is reached.
5626       Other recursion is handled here. */
5627 
5628       case OP_CBRA:
5629       case OP_CBRAPOS:
5630       case OP_SCBRA:
5631       case OP_SCBRAPOS:
5632       number = GET2(bracode, 1+LINK_SIZE);
5633 
5634       /* Handle a recursively called group. We reinstate the previous set of
5635       captures and then carry on after the recursion call. */
5636 
5637       if (Fcurrent_recurse == number)
5638         {
5639         P = (heapframe *)((char *)N - frame_size);
5640         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5641           P->offset_top * sizeof(PCRE2_SIZE));
5642         Foffset_top = P->offset_top;
5643         Fcapture_last = P->capture_last;
5644         Fcurrent_recurse = P->current_recurse;
5645         Fecode = P->ecode + 1 + LINK_SIZE;
5646         continue;  /* With next opcode */
5647         }
5648 
5649       /* Deal with actual capturing. */
5650 
5651       offset = (number << 1) - 2;
5652       Fcapture_last = number;
5653       Fovector[offset] = P->eptr - mb->start_subject;
5654       Fovector[offset+1] = Feptr - mb->start_subject;
5655       if (offset >= Foffset_top) Foffset_top = offset + 2;
5656       break;
5657       }  /* End actions relating to the starting opcode */
5658 
5659     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5660     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5661     at a time from the outer level. This must precede the empty string test -
5662     in this case that test is done at the outer level. */
5663 
5664     if (*Fecode == OP_KETRPOS)
5665       {
5666       memcpy((char *)P + offsetof(heapframe, eptr),
5667              (char *)F + offsetof(heapframe, eptr),
5668              frame_copy_size);
5669       RRETURN(MATCH_KETRPOS);
5670       }
5671 
5672     /* Handle the different kinds of closing brackets. A non-repeating ket
5673     needs no special action, just continuing at this level. This also happens
5674     for the repeating kets if the group matched no characters, in order to
5675     forcibly break infinite loops. Otherwise, the repeating kets try the rest
5676     of the pattern or restart from the preceding bracket, in the appropriate
5677     order. */
5678 
5679     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5680       {
5681       if (Fop == OP_KETRMIN)
5682         {
5683         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5684         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5685         Fecode -= GET(Fecode, 1);
5686         break;   /* End of ket processing */
5687         }
5688 
5689       /* Repeat the maximum number of times (KETRMAX) */
5690 
5691       RMATCH(bracode, RM7);
5692       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5693       }
5694 
5695     /* Carry on at this level for a non-repeating ket, or after matching an
5696     empty string, or after repeating for a maximum number of times. */
5697 
5698     Fecode += 1 + LINK_SIZE;
5699     break;
5700 
5701 
5702     /* ===================================================================== */
5703     /* Start and end of line assertions, not multiline mode. */
5704 
5705     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5706     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5707       RRETURN(MATCH_NOMATCH);
5708     Fecode++;
5709     break;
5710 
5711     case OP_SOD:    /* Unconditional start of subject */
5712     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5713     Fecode++;
5714     break;
5715 
5716     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5717     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5718 
5719     case OP_DOLL:
5720     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5721     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5722 
5723     /* Fall through */
5724     /* Unconditional end of subject assertion (\z) */
5725 
5726     case OP_EOD:
5727     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5728     if (mb->partial != 0)
5729       {
5730       mb->hitend = TRUE;
5731       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5732       }
5733     Fecode++;
5734     break;
5735 
5736     /* End of subject or ending \n assertion (\Z) */
5737 
5738     case OP_EODN:
5739     ASSERT_NL_OR_EOS:
5740     if (Feptr < mb->end_subject &&
5741         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5742       {
5743       if (mb->partial != 0 &&
5744           Feptr + 1 >= mb->end_subject &&
5745           NLBLOCK->nltype == NLTYPE_FIXED &&
5746           NLBLOCK->nllen == 2 &&
5747           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5748         {
5749         mb->hitend = TRUE;
5750         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5751         }
5752       RRETURN(MATCH_NOMATCH);
5753       }
5754 
5755     /* Either at end of string or \n before end. */
5756 
5757     if (mb->partial != 0)
5758       {
5759       mb->hitend = TRUE;
5760       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5761       }
5762     Fecode++;
5763     break;
5764 
5765 
5766     /* ===================================================================== */
5767     /* Start and end of line assertions, multiline mode. */
5768 
5769     /* Start of subject unless notbol, or after any newline except for one at
5770     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5771 
5772     case OP_CIRCM:
5773     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5774       RRETURN(MATCH_NOMATCH);
5775     if (Feptr != mb->start_subject &&
5776         ((Feptr == mb->end_subject &&
5777            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5778          !WAS_NEWLINE(Feptr)))
5779       RRETURN(MATCH_NOMATCH);
5780     Fecode++;
5781     break;
5782 
5783     /* Assert before any newline, or before end of subject unless noteol is
5784     set. */
5785 
5786     case OP_DOLLM:
5787     if (Feptr < mb->end_subject)
5788       {
5789       if (!IS_NEWLINE(Feptr))
5790         {
5791         if (mb->partial != 0 &&
5792             Feptr + 1 >= mb->end_subject &&
5793             NLBLOCK->nltype == NLTYPE_FIXED &&
5794             NLBLOCK->nllen == 2 &&
5795             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5796           {
5797           mb->hitend = TRUE;
5798           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5799           }
5800         RRETURN(MATCH_NOMATCH);
5801         }
5802       }
5803     else
5804       {
5805       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5806       SCHECK_PARTIAL();
5807       }
5808     Fecode++;
5809     break;
5810 
5811 
5812     /* ===================================================================== */
5813     /* Start of match assertion */
5814 
5815     case OP_SOM:
5816     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
5817     Fecode++;
5818     break;
5819 
5820 
5821     /* ===================================================================== */
5822     /* Reset the start of match point */
5823 
5824     case OP_SET_SOM:
5825     Fstart_match = Feptr;
5826     Fecode++;
5827     break;
5828 
5829 
5830     /* ===================================================================== */
5831     /* Word boundary assertions. Find out if the previous and current
5832     characters are "word" characters. It takes a bit more work in UTF mode.
5833     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
5834     not set. When it is set, use Unicode properties if available, even when not
5835     in UTF mode. Remember the earliest and latest consulted characters. */
5836 
5837     case OP_NOT_WORD_BOUNDARY:
5838     case OP_WORD_BOUNDARY:
5839     if (Feptr == mb->check_subject) prev_is_word = FALSE; else
5840       {
5841       PCRE2_SPTR lastptr = Feptr - 1;
5842 #ifdef SUPPORT_UNICODE
5843       if (utf)
5844         {
5845         BACKCHAR(lastptr);
5846         GETCHAR(fc, lastptr);
5847         }
5848       else
5849 #endif  /* SUPPORT_UNICODE */
5850       fc = *lastptr;
5851       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
5852 #ifdef SUPPORT_UNICODE
5853       if ((mb->poptions & PCRE2_UCP) != 0)
5854         {
5855         if (fc == '_') prev_is_word = TRUE; else
5856           {
5857           int cat = UCD_CATEGORY(fc);
5858           prev_is_word = (cat == ucp_L || cat == ucp_N);
5859           }
5860         }
5861       else
5862 #endif  /* SUPPORT_UNICODE */
5863       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5864       }
5865 
5866     /* Get status of next character */
5867 
5868     if (Feptr >= mb->end_subject)
5869       {
5870       SCHECK_PARTIAL();
5871       cur_is_word = FALSE;
5872       }
5873     else
5874       {
5875       PCRE2_SPTR nextptr = Feptr + 1;
5876 #ifdef SUPPORT_UNICODE
5877       if (utf)
5878         {
5879         FORWARDCHARTEST(nextptr, mb->end_subject);
5880         GETCHAR(fc, Feptr);
5881         }
5882       else
5883 #endif  /* SUPPORT_UNICODE */
5884       fc = *Feptr;
5885       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
5886 #ifdef SUPPORT_UNICODE
5887       if ((mb->poptions & PCRE2_UCP) != 0)
5888         {
5889         if (fc == '_') cur_is_word = TRUE; else
5890           {
5891           int cat = UCD_CATEGORY(fc);
5892           cur_is_word = (cat == ucp_L || cat == ucp_N);
5893           }
5894         }
5895       else
5896 #endif  /* SUPPORT_UNICODE */
5897       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5898       }
5899 
5900     /* Now see if the situation is what we want */
5901 
5902     if ((*Fecode++ == OP_WORD_BOUNDARY)?
5903          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
5904       RRETURN(MATCH_NOMATCH);
5905     break;
5906 
5907 
5908     /* ===================================================================== */
5909     /* Backtracking (*VERB)s, with and without arguments. Note that if the
5910     pattern is successfully matched, we do not come back from RMATCH. */
5911 
5912     case OP_MARK:
5913     Fmark = mb->nomatch_mark = Fecode + 2;
5914     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
5915 
5916     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
5917     argument, and we must check whether that argument matches this MARK's
5918     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
5919     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
5920     position that corresponds to this mark. Otherwise, pass back the return
5921     code unaltered. */
5922 
5923     if (rrc == MATCH_SKIP_ARG &&
5924              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
5925       {
5926       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5927       RRETURN(MATCH_SKIP);
5928       }
5929     RRETURN(rrc);
5930 
5931     case OP_FAIL:
5932     RRETURN(MATCH_NOMATCH);
5933 
5934     /* Record the current recursing group number in mb->verb_current_recurse
5935     when a backtracking return such as MATCH_COMMIT is given. This enables the
5936     recurse processing to catch verbs from within the recursion. */
5937 
5938     case OP_COMMIT:
5939     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
5940     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5941     mb->verb_current_recurse = Fcurrent_recurse;
5942     RRETURN(MATCH_COMMIT);
5943 
5944     case OP_COMMIT_ARG:
5945     Fmark = mb->nomatch_mark = Fecode + 2;
5946     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
5947     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5948     mb->verb_current_recurse = Fcurrent_recurse;
5949     RRETURN(MATCH_COMMIT);
5950 
5951     case OP_PRUNE:
5952     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
5953     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5954     mb->verb_current_recurse = Fcurrent_recurse;
5955     RRETURN(MATCH_PRUNE);
5956 
5957     case OP_PRUNE_ARG:
5958     Fmark = mb->nomatch_mark = Fecode + 2;
5959     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
5960     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5961     mb->verb_current_recurse = Fcurrent_recurse;
5962     RRETURN(MATCH_PRUNE);
5963 
5964     case OP_SKIP:
5965     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
5966     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5967     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5968     mb->verb_current_recurse = Fcurrent_recurse;
5969     RRETURN(MATCH_SKIP);
5970 
5971     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
5972     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
5973     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
5974     that failed and any that precede it (either they also failed, or were not
5975     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
5976     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
5977     set to the count of the one that failed. */
5978 
5979     case OP_SKIP_ARG:
5980     mb->skip_arg_count++;
5981     if (mb->skip_arg_count <= mb->ignore_skip_arg)
5982       {
5983       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
5984       break;
5985       }
5986     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
5987     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5988 
5989     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
5990     return code. This will either be caught by a matching MARK, or get to the
5991     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
5992     mb->skip_arg_count. */
5993 
5994     mb->verb_skip_ptr = Fecode + 2;
5995     mb->verb_current_recurse = Fcurrent_recurse;
5996     RRETURN(MATCH_SKIP_ARG);
5997 
5998     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
5999     the branch in which it occurs can be determined. */
6000 
6001     case OP_THEN:
6002     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6003     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6004     mb->verb_ecode_ptr = Fecode;
6005     mb->verb_current_recurse = Fcurrent_recurse;
6006     RRETURN(MATCH_THEN);
6007 
6008     case OP_THEN_ARG:
6009     Fmark = mb->nomatch_mark = Fecode + 2;
6010     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6011     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6012     mb->verb_ecode_ptr = Fecode;
6013     mb->verb_current_recurse = Fcurrent_recurse;
6014     RRETURN(MATCH_THEN);
6015 
6016 
6017     /* ===================================================================== */
6018     /* There's been some horrible disaster. Arrival here can only mean there is
6019     something seriously wrong in the code above or the OP_xxx definitions. */
6020 
6021     default:
6022     return PCRE2_ERROR_INTERNAL;
6023     }
6024 
6025   /* Do not insert any code in here without much thought; it is assumed
6026   that "continue" in the code above comes out to here to repeat the main
6027   loop. */
6028 
6029   }  /* End of main loop */
6030 /* Control never reaches here */
6031 
6032 
6033 /* ========================================================================= */
6034 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6035 indicates which label we actually want to return to. The value in Frdepth is
6036 the index number of the frame in the vector. The return value has been placed
6037 in rrc. */
6038 
6039 #define LBL(val) case val: goto L_RM##val;
6040 
6041 RETURN_SWITCH:
6042 if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6043 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6044 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6045 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6046 
6047 #ifdef DEBUG_SHOW_RMATCH
6048 fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6049 #endif
6050 
6051 switch (Freturn_id)
6052   {
6053   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6054   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6055   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6056   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6057   LBL(33) LBL(34) LBL(35) LBL(36)
6058 
6059 #ifdef SUPPORT_WIDE_CHARS
6060   LBL(100) LBL(101)
6061 #endif
6062 
6063 #ifdef SUPPORT_UNICODE
6064   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6065   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6066   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6067   LBL(221) LBL(222)
6068 #endif
6069 
6070   default:
6071   return PCRE2_ERROR_INTERNAL;
6072   }
6073 #undef LBL
6074 }
6075 
6076 
6077 /*************************************************
6078 *           Match a Regular Expression           *
6079 *************************************************/
6080 
6081 /* This function applies a compiled pattern to a subject string and picks out
6082 portions of the string if it matches. Two elements in the vector are set for
6083 each substring: the offsets to the start and end of the substring.
6084 
6085 Arguments:
6086   code            points to the compiled expression
6087   subject         points to the subject string
6088   length          length of subject string (may contain binary zeros)
6089   start_offset    where to start in the subject string
6090   options         option bits
6091   match_data      points to a match_data block
6092   mcontext        points a PCRE2 context
6093 
6094 Returns:          > 0 => success; value is the number of ovector pairs filled
6095                   = 0 => success, but ovector is not big enough
6096                   = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6097                   = -2 => partial match (PCRE2_ERROR_PARTIAL)
6098                   < -2 => some kind of unexpected problem
6099 */
6100 
6101 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6102 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6103   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6104   pcre2_match_context *mcontext)
6105 {
6106 int rc;
6107 int was_zero_terminated = 0;
6108 const uint8_t *start_bits = NULL;
6109 const pcre2_real_code *re = (const pcre2_real_code *)code;
6110 
6111 BOOL anchored;
6112 BOOL firstline;
6113 BOOL has_first_cu = FALSE;
6114 BOOL has_req_cu = FALSE;
6115 BOOL startline;
6116 
6117 #if PCRE2_CODE_UNIT_WIDTH == 8
6118 BOOL memchr_not_found_first_cu;
6119 BOOL memchr_not_found_first_cu2;
6120 #endif
6121 
6122 PCRE2_UCHAR first_cu = 0;
6123 PCRE2_UCHAR first_cu2 = 0;
6124 PCRE2_UCHAR req_cu = 0;
6125 PCRE2_UCHAR req_cu2 = 0;
6126 
6127 PCRE2_SPTR bumpalong_limit;
6128 PCRE2_SPTR end_subject;
6129 PCRE2_SPTR true_end_subject;
6130 PCRE2_SPTR start_match = subject + start_offset;
6131 PCRE2_SPTR req_cu_ptr = start_match - 1;
6132 PCRE2_SPTR start_partial;
6133 PCRE2_SPTR match_partial;
6134 
6135 #ifdef SUPPORT_JIT
6136 BOOL use_jit;
6137 #endif
6138 
6139 /* This flag is needed even when Unicode is not supported for convenience
6140 (it is used by the IS_NEWLINE macro). */
6141 
6142 BOOL utf = FALSE;
6143 
6144 #ifdef SUPPORT_UNICODE
6145 BOOL ucp = FALSE;
6146 BOOL allow_invalid;
6147 uint32_t fragment_options = 0;
6148 #ifdef SUPPORT_JIT
6149 BOOL jit_checked_utf = FALSE;
6150 #endif
6151 #endif  /* SUPPORT_UNICODE */
6152 
6153 PCRE2_SIZE frame_size;
6154 
6155 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6156 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6157 
6158 pcre2_callout_block cb;
6159 match_block actual_match_block;
6160 match_block *mb = &actual_match_block;
6161 
6162 /* Allocate an initial vector of backtracking frames on the stack. If this
6163 proves to be too small, it is replaced by a larger one on the heap. To get a
6164 vector of the size required that is aligned for pointers, allocate it as a
6165 vector of pointers. */
6166 
6167 PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
6168     PCRE2_KEEP_UNINITIALIZED;
6169 mb->stack_frames = (heapframe *)stack_frames_vector;
6170 
6171 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
6172 subject string. */
6173 
6174 if (length == PCRE2_ZERO_TERMINATED)
6175   {
6176   length = PRIV(strlen)(subject);
6177   was_zero_terminated = 1;
6178   }
6179 true_end_subject = end_subject = subject + length;
6180 
6181 /* Plausibility checks */
6182 
6183 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6184 if (code == NULL || subject == NULL || match_data == NULL)
6185   return PCRE2_ERROR_NULL;
6186 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6187 
6188 /* Check that the first field in the block is the magic number. */
6189 
6190 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6191 
6192 /* Check the code unit width. */
6193 
6194 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6195   return PCRE2_ERROR_BADMODE;
6196 
6197 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6198 options variable for this function. Users of PCRE2 who are not calling the
6199 function directly would like to have a way of setting these flags, in the same
6200 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6201 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6202 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6203 transfer to the options for this function. The bits are guaranteed to be
6204 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6205 that the match-time bits are not more significant than the flag bits. If by
6206 accident this is not the case, a compile-time division by zero error will
6207 occur. */
6208 
6209 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6210 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6211 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6212 #undef FF
6213 #undef OO
6214 
6215 /* If the pattern was successfully studied with JIT support, we will run the
6216 JIT executable instead of the rest of this function. Most options must be set
6217 at compile time for the JIT code to be usable. */
6218 
6219 #ifdef SUPPORT_JIT
6220 use_jit = (re->executable_jit != NULL &&
6221           (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6222 #endif
6223 
6224 /* Initialize UTF/UCP parameters. */
6225 
6226 #ifdef SUPPORT_UNICODE
6227 utf = (re->overall_options & PCRE2_UTF) != 0;
6228 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6229 ucp = (re->overall_options & PCRE2_UCP) != 0;
6230 #endif  /* SUPPORT_UNICODE */
6231 
6232 /* Convert the partial matching flags into an integer. */
6233 
6234 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6235               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6236 
6237 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6238 time. */
6239 
6240 if (mb->partial != 0 &&
6241    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6242   return PCRE2_ERROR_BADOPTION;
6243 
6244 /* It is an error to set an offset limit without setting the flag at compile
6245 time. */
6246 
6247 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6248      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6249   return PCRE2_ERROR_BADOFFSETLIMIT;
6250 
6251 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6252 free the memory that was obtained. Set the field to NULL for no match cases. */
6253 
6254 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6255   {
6256   match_data->memctl.free((void *)match_data->subject,
6257     match_data->memctl.memory_data);
6258   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6259   }
6260 match_data->subject = NULL;
6261 
6262 /* Zero the error offset in case the first code unit is invalid UTF. */
6263 
6264 match_data->startchar = 0;
6265 
6266 
6267 /* ============================= JIT matching ============================== */
6268 
6269 /* Prepare for JIT matching. Check a UTF string for validity unless no check is
6270 requested or invalid UTF can be handled. We check only the portion of the
6271 subject that might be be inspected during matching - from the offset minus the
6272 maximum lookbehind to the given length. This saves time when a small part of a
6273 large subject is being matched by the use of a starting offset. Note that the
6274 maximum lookbehind is a number of characters, not code units. */
6275 
6276 #ifdef SUPPORT_JIT
6277 if (use_jit)
6278   {
6279 #ifdef SUPPORT_UNICODE
6280   if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6281     {
6282 #if PCRE2_CODE_UNIT_WIDTH != 32
6283     unsigned int i;
6284 #endif
6285 
6286     /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6287     character start. */
6288 
6289 #if PCRE2_CODE_UNIT_WIDTH != 32
6290     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6291       {
6292       if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6293 #if PCRE2_CODE_UNIT_WIDTH == 8
6294       return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6295 #else
6296       return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6297 #endif
6298       }
6299 #endif  /* WIDTH != 32 */
6300 
6301     /* Move back by the maximum lookbehind, just in case it happens at the very
6302     start of matching. */
6303 
6304 #if PCRE2_CODE_UNIT_WIDTH != 32
6305     for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6306       {
6307       start_match--;
6308       while (start_match > subject &&
6309 #if PCRE2_CODE_UNIT_WIDTH == 8
6310       (*start_match & 0xc0) == 0x80)
6311 #else  /* 16-bit */
6312       (*start_match & 0xfc00) == 0xdc00)
6313 #endif
6314         start_match--;
6315       }
6316 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6317 
6318     /* In the 32-bit library, one code unit equals one character. However,
6319     we cannot just subtract the lookbehind and then compare pointers, because
6320     a very large lookbehind could create an invalid pointer. */
6321 
6322     if (start_offset >= re->max_lookbehind)
6323       start_match -= re->max_lookbehind;
6324     else
6325       start_match = subject;
6326 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6327 
6328     /* Validate the relevant portion of the subject. Adjust the offset of an
6329     invalid code point to be an absolute offset in the whole string. */
6330 
6331     match_data->rc = PRIV(valid_utf)(start_match,
6332       length - (start_match - subject), &(match_data->startchar));
6333     if (match_data->rc != 0)
6334       {
6335       match_data->startchar += start_match - subject;
6336       return match_data->rc;
6337       }
6338     jit_checked_utf = TRUE;
6339     }
6340 #endif  /* SUPPORT_UNICODE */
6341 
6342   /* If JIT returns BADOPTION, which means that the selected complete or
6343   partial matching mode was not compiled, fall through to the interpreter. */
6344 
6345   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6346     match_data, mcontext);
6347   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6348     {
6349     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6350       {
6351       length = CU2BYTES(length + was_zero_terminated);
6352       match_data->subject = match_data->memctl.malloc(length,
6353         match_data->memctl.memory_data);
6354       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6355       memcpy((void *)match_data->subject, subject, length);
6356       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6357       }
6358     return rc;
6359     }
6360   }
6361 #endif  /* SUPPORT_JIT */
6362 
6363 /* ========================= End of JIT matching ========================== */
6364 
6365 
6366 /* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6367 start of the subject. A UTF check when there is a non-zero offset may change
6368 this. */
6369 
6370 mb->check_subject = subject;
6371 
6372 /* If a UTF subject string was not checked for validity in the JIT code above,
6373 check it here, and handle support for invalid UTF strings. The check above
6374 happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6375 If we get here in those circumstances, it means the subject string is valid,
6376 but for some reason JIT matching was not successful. There is no need to check
6377 the subject again.
6378 
6379 We check only the portion of the subject that might be be inspected during
6380 matching - from the offset minus the maximum lookbehind to the given length.
6381 This saves time when a small part of a large subject is being matched by the
6382 use of a starting offset. Note that the maximum lookbehind is a number of
6383 characters, not code units.
6384 
6385 Note also that support for invalid UTF forces a check, overriding the setting
6386 of PCRE2_NO_CHECK_UTF. */
6387 
6388 #ifdef SUPPORT_UNICODE
6389 if (utf &&
6390 #ifdef SUPPORT_JIT
6391     !jit_checked_utf &&
6392 #endif
6393     ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6394   {
6395 #if PCRE2_CODE_UNIT_WIDTH != 32
6396   BOOL skipped_bad_start = FALSE;
6397 #endif
6398 
6399   /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6400   character start. If we are handling invalid UTF, just skip over such code
6401   units. Otherwise, give an appropriate error. */
6402 
6403 #if PCRE2_CODE_UNIT_WIDTH != 32
6404   if (allow_invalid)
6405     {
6406     while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6407       {
6408       start_match++;
6409       skipped_bad_start = TRUE;
6410       }
6411     }
6412   else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6413     {
6414     if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6415 #if PCRE2_CODE_UNIT_WIDTH == 8
6416     return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6417 #else
6418     return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6419 #endif
6420     }
6421 #endif  /* WIDTH != 32 */
6422 
6423   /* The mb->check_subject field points to the start of UTF checking;
6424   lookbehinds can go back no further than this. */
6425 
6426   mb->check_subject = start_match;
6427 
6428   /* Move back by the maximum lookbehind, just in case it happens at the very
6429   start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6430   units above. */
6431 
6432 #if PCRE2_CODE_UNIT_WIDTH != 32
6433   if (!skipped_bad_start)
6434     {
6435     unsigned int i;
6436     for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6437       {
6438       mb->check_subject--;
6439       while (mb->check_subject > subject &&
6440 #if PCRE2_CODE_UNIT_WIDTH == 8
6441       (*mb->check_subject & 0xc0) == 0x80)
6442 #else  /* 16-bit */
6443       (*mb->check_subject & 0xfc00) == 0xdc00)
6444 #endif
6445         mb->check_subject--;
6446       }
6447     }
6448 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6449 
6450   /* In the 32-bit library, one code unit equals one character. However,
6451   we cannot just subtract the lookbehind and then compare pointers, because
6452   a very large lookbehind could create an invalid pointer. */
6453 
6454   if (start_offset >= re->max_lookbehind)
6455     mb->check_subject -= re->max_lookbehind;
6456   else
6457     mb->check_subject = subject;
6458 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6459 
6460   /* Validate the relevant portion of the subject. There's a loop in case we
6461   encounter bad UTF in the characters preceding start_match which we are
6462   scanning because of a lookbehind. */
6463 
6464   for (;;)
6465     {
6466     match_data->rc = PRIV(valid_utf)(mb->check_subject,
6467       length - (mb->check_subject - subject), &(match_data->startchar));
6468 
6469     if (match_data->rc == 0) break;   /* Valid UTF string */
6470 
6471     /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6472     whole string. If we are handling invalid UTF strings, set end_subject to
6473     stop before the bad code unit, and set the options to "not end of line".
6474     Otherwise return the error. */
6475 
6476     match_data->startchar += mb->check_subject - subject;
6477     if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6478     end_subject = subject + match_data->startchar;
6479 
6480     /* If the end precedes start_match, it means there is invalid UTF in the
6481     extra code units we reversed over because of a lookbehind. Advance past the
6482     first bad code unit, and then skip invalid character starting code units in
6483     8-bit and 16-bit modes, and try again. */
6484 
6485     if (end_subject < start_match)
6486       {
6487       mb->check_subject = end_subject + 1;
6488 #if PCRE2_CODE_UNIT_WIDTH != 32
6489       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6490         mb->check_subject++;
6491 #endif
6492       }
6493 
6494     /* Otherwise, set the not end of line option, and do the match. */
6495 
6496     else
6497       {
6498       fragment_options = PCRE2_NOTEOL;
6499       break;
6500       }
6501     }
6502   }
6503 #endif  /* SUPPORT_UNICODE */
6504 
6505 /* A NULL match context means "use a default context", but we take the memory
6506 control functions from the pattern. */
6507 
6508 if (mcontext == NULL)
6509   {
6510   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6511   mb->memctl = re->memctl;
6512   }
6513 else mb->memctl = mcontext->memctl;
6514 
6515 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6516 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6517 startline = (re->flags & PCRE2_STARTLINE) != 0;
6518 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6519   true_end_subject : subject + mcontext->offset_limit;
6520 
6521 /* Initialize and set up the fixed fields in the callout block, with a pointer
6522 in the match block. */
6523 
6524 mb->cb = &cb;
6525 cb.version = 2;
6526 cb.subject = subject;
6527 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6528 cb.callout_flags = 0;
6529 
6530 /* Fill in the remaining fields in the match block, except for moptions, which
6531 gets set later. */
6532 
6533 mb->callout = mcontext->callout;
6534 mb->callout_data = mcontext->callout_data;
6535 
6536 mb->start_subject = subject;
6537 mb->start_offset = start_offset;
6538 mb->end_subject = end_subject;
6539 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6540 mb->allowemptypartial = (re->max_lookbehind > 0) ||
6541     (re->flags & PCRE2_MATCH_EMPTY) != 0;
6542 mb->poptions = re->overall_options;          /* Pattern options */
6543 mb->ignore_skip_arg = 0;
6544 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6545 
6546 /* The name table is needed for finding all the numbers associated with a
6547 given name, for condition testing. The code follows the name table. */
6548 
6549 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6550 mb->name_count = re->name_count;
6551 mb->name_entry_size = re->name_entry_size;
6552 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6553 
6554 /* Process the \R and newline settings. */
6555 
6556 mb->bsr_convention = re->bsr_convention;
6557 mb->nltype = NLTYPE_FIXED;
6558 switch(re->newline_convention)
6559   {
6560   case PCRE2_NEWLINE_CR:
6561   mb->nllen = 1;
6562   mb->nl[0] = CHAR_CR;
6563   break;
6564 
6565   case PCRE2_NEWLINE_LF:
6566   mb->nllen = 1;
6567   mb->nl[0] = CHAR_NL;
6568   break;
6569 
6570   case PCRE2_NEWLINE_NUL:
6571   mb->nllen = 1;
6572   mb->nl[0] = CHAR_NUL;
6573   break;
6574 
6575   case PCRE2_NEWLINE_CRLF:
6576   mb->nllen = 2;
6577   mb->nl[0] = CHAR_CR;
6578   mb->nl[1] = CHAR_NL;
6579   break;
6580 
6581   case PCRE2_NEWLINE_ANY:
6582   mb->nltype = NLTYPE_ANY;
6583   break;
6584 
6585   case PCRE2_NEWLINE_ANYCRLF:
6586   mb->nltype = NLTYPE_ANYCRLF;
6587   break;
6588 
6589   default: return PCRE2_ERROR_INTERNAL;
6590   }
6591 
6592 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6593 vector at the end, whose size depends on the number of capturing parentheses in
6594 the pattern. It is not used at all if there are no capturing parentheses.
6595 
6596   frame_size             is the total size of each frame
6597   mb->frame_vector_size  is the total usable size of the vector (rounded down
6598                            to a whole number of frames)
6599 
6600 The last of these is changed within the match() function if the frame vector
6601 has to be expanded. We therefore put it into the match block so that it is
6602 correct when calling match() more than once for non-anchored patterns. */
6603 
6604 frame_size = offsetof(heapframe, ovector) +
6605   re->top_bracket * 2 * sizeof(PCRE2_SIZE);
6606 
6607 /* Limits set in the pattern override the match context only if they are
6608 smaller. */
6609 
6610 mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
6611   mcontext->heap_limit : re->limit_heap;
6612 
6613 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6614   mcontext->match_limit : re->limit_match;
6615 
6616 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6617   mcontext->depth_limit : re->limit_depth;
6618 
6619 /* If a pattern has very many capturing parentheses, the frame size may be very
6620 large. Ensure that there are at least 10 available frames by getting an initial
6621 vector on the heap if necessary, except when the heap limit prevents this. Get
6622 fewer if possible. (The heap limit is in kibibytes.) */
6623 
6624 if (frame_size <= START_FRAMES_SIZE/10)
6625   {
6626   mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
6627   mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
6628   }
6629 else
6630   {
6631   mb->frame_vector_size = frame_size * 10;
6632   if ((mb->frame_vector_size / 1024) > mb->heap_limit)
6633     {
6634     if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
6635     mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
6636     }
6637   mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
6638     mb->memctl.memory_data);
6639   if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
6640   }
6641 
6642 mb->match_frames_top =
6643   (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
6644 
6645 /* Write to the ovector within the first frame to mark every capture unset and
6646 to avoid uninitialized memory read errors when it is copied to a new frame. */
6647 
6648 memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
6649   re->top_bracket * 2 * sizeof(PCRE2_SIZE));
6650 
6651 /* Pointers to the individual character tables */
6652 
6653 mb->lcc = re->tables + lcc_offset;
6654 mb->fcc = re->tables + fcc_offset;
6655 mb->ctypes = re->tables + ctypes_offset;
6656 
6657 /* Set up the first code unit to match, if available. If there's no first code
6658 unit there may be a bitmap of possible first characters. */
6659 
6660 if ((re->flags & PCRE2_FIRSTSET) != 0)
6661   {
6662   has_first_cu = TRUE;
6663   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6664   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6665     {
6666     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6667 #ifdef SUPPORT_UNICODE
6668 #if PCRE2_CODE_UNIT_WIDTH == 8
6669     if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6670 #else
6671     if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6672 #endif
6673 #endif  /* SUPPORT_UNICODE */
6674     }
6675   }
6676 else
6677   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6678     start_bits = re->start_bitmap;
6679 
6680 /* There may also be a "last known required character" set. */
6681 
6682 if ((re->flags & PCRE2_LASTSET) != 0)
6683   {
6684   has_req_cu = TRUE;
6685   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6686   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6687     {
6688     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6689 #ifdef SUPPORT_UNICODE
6690 #if PCRE2_CODE_UNIT_WIDTH == 8
6691     if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6692 #else
6693     if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6694 #endif
6695 #endif  /* SUPPORT_UNICODE */
6696     }
6697   }
6698 
6699 
6700 /* ==========================================================================*/
6701 
6702 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6703 the loop runs just once. */
6704 
6705 #ifdef SUPPORT_UNICODE
6706 FRAGMENT_RESTART:
6707 #endif
6708 
6709 start_partial = match_partial = NULL;
6710 mb->hitend = FALSE;
6711 
6712 #if PCRE2_CODE_UNIT_WIDTH == 8
6713 memchr_not_found_first_cu = FALSE;
6714 memchr_not_found_first_cu2 = FALSE;
6715 #endif
6716 
6717 for(;;)
6718   {
6719   PCRE2_SPTR new_start_match;
6720 
6721   /* ----------------- Start of match optimizations ---------------- */
6722 
6723   /* There are some optimizations that avoid running the match if a known
6724   starting point is not found, or if a known later code unit is not present.
6725   However, there is an option (settable at compile time) that disables these,
6726   for testing and for ensuring that all callouts do actually occur. */
6727 
6728   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6729     {
6730     /* If firstline is TRUE, the start of the match is constrained to the first
6731     line of a multiline string. That is, the match must be before or at the
6732     first newline following the start of matching. Temporarily adjust
6733     end_subject so that we stop the scans for a first code unit at a newline.
6734     If the match fails at the newline, later code breaks the loop. */
6735 
6736     if (firstline)
6737       {
6738       PCRE2_SPTR t = start_match;
6739 #ifdef SUPPORT_UNICODE
6740       if (utf)
6741         {
6742         while (t < end_subject && !IS_NEWLINE(t))
6743           {
6744           t++;
6745           ACROSSCHAR(t < end_subject, t, t++);
6746           }
6747         }
6748       else
6749 #endif
6750       while (t < end_subject && !IS_NEWLINE(t)) t++;
6751       end_subject = t;
6752       }
6753 
6754     /* Anchored: check the first code unit if one is recorded. This may seem
6755     pointless but it can help in detecting a no match case without scanning for
6756     the required code unit. */
6757 
6758     if (anchored)
6759       {
6760       if (has_first_cu || start_bits != NULL)
6761         {
6762         BOOL ok = start_match < end_subject;
6763         if (ok)
6764           {
6765           PCRE2_UCHAR c = UCHAR21TEST(start_match);
6766           ok = has_first_cu && (c == first_cu || c == first_cu2);
6767           if (!ok && start_bits != NULL)
6768             {
6769 #if PCRE2_CODE_UNIT_WIDTH != 8
6770             if (c > 255) c = 255;
6771 #endif
6772             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6773             }
6774           }
6775         if (!ok)
6776           {
6777           rc = MATCH_NOMATCH;
6778           break;
6779           }
6780         }
6781       }
6782 
6783     /* Not anchored. Advance to a unique first code unit if there is one. In
6784     8-bit mode, the use of memchr() gives a big speed up, even though we have
6785     to call it twice in caseless mode, in order to find the earliest occurrence
6786     of the character in either of its cases. If a call to memchr() that
6787     searches the rest of the subject fails to find one case, remember that in
6788     order not to keep on repeating the search. This can make a huge difference
6789     when the strings are very long and only one case is present. */
6790 
6791     else
6792       {
6793       if (has_first_cu)
6794         {
6795         if (first_cu != first_cu2)  /* Caseless */
6796           {
6797 #if PCRE2_CODE_UNIT_WIDTH != 8
6798           PCRE2_UCHAR smc;
6799           while (start_match < end_subject &&
6800                 (smc = UCHAR21TEST(start_match)) != first_cu &&
6801                   smc != first_cu2)
6802             start_match++;
6803 
6804 #else  /* 8-bit code units */
6805           PCRE2_SPTR pp1 = NULL;
6806           PCRE2_SPTR pp2 = NULL;
6807           PCRE2_SIZE cu2size = end_subject - start_match;
6808 
6809           if (!memchr_not_found_first_cu)
6810             {
6811             pp1 = memchr(start_match, first_cu, end_subject - start_match);
6812             if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
6813               else cu2size = pp1 - start_match;
6814             }
6815 
6816           /* If pp1 is not NULL, we have arranged to search only as far as pp1,
6817           to see if the other case is earlier, so we can set "not found" only
6818           when both searches have returned NULL. */
6819 
6820           if (!memchr_not_found_first_cu2)
6821             {
6822             pp2 = memchr(start_match, first_cu2, cu2size);
6823             memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
6824             }
6825 
6826           if (pp1 == NULL)
6827             start_match = (pp2 == NULL)? end_subject : pp2;
6828           else
6829             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
6830 #endif
6831           }
6832 
6833         /* The caseful case */
6834 
6835         else
6836           {
6837 #if PCRE2_CODE_UNIT_WIDTH != 8
6838           while (start_match < end_subject && UCHAR21TEST(start_match) !=
6839                  first_cu)
6840             start_match++;
6841 #else
6842           start_match = memchr(start_match, first_cu, end_subject - start_match);
6843           if (start_match == NULL) start_match = end_subject;
6844 #endif
6845           }
6846 
6847         /* If we can't find the required first code unit, having reached the
6848         true end of the subject, break the bumpalong loop, to force a match
6849         failure, except when doing partial matching, when we let the next cycle
6850         run at the end of the subject. To see why, consider the pattern
6851         /(?<=abc)def/, which partially matches "abc", even though the string
6852         does not contain the starting character "d". If we have not reached the
6853         true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
6854         temporarily modified) we also let the cycle run, because the matching
6855         string is legitimately allowed to start with the first code unit of a
6856         newline. */
6857 
6858         if (mb->partial == 0 && start_match >= mb->end_subject)
6859           {
6860           rc = MATCH_NOMATCH;
6861           break;
6862           }
6863         }
6864 
6865       /* If there's no first code unit, advance to just after a linebreak for a
6866       multiline match if required. */
6867 
6868       else if (startline)
6869         {
6870         if (start_match > mb->start_subject + start_offset)
6871           {
6872 #ifdef SUPPORT_UNICODE
6873           if (utf)
6874             {
6875             while (start_match < end_subject && !WAS_NEWLINE(start_match))
6876               {
6877               start_match++;
6878               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
6879               }
6880             }
6881           else
6882 #endif
6883           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6884             start_match++;
6885 
6886           /* If we have just passed a CR and the newline option is ANY or
6887           ANYCRLF, and we are now at a LF, advance the match position by one
6888           more code unit. */
6889 
6890           if (start_match[-1] == CHAR_CR &&
6891                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
6892                start_match < end_subject &&
6893                UCHAR21TEST(start_match) == CHAR_NL)
6894             start_match++;
6895           }
6896         }
6897 
6898       /* If there's no first code unit or a requirement for a multiline line
6899       start, advance to a non-unique first code unit if any have been
6900       identified. The bitmap contains only 256 bits. When code units are 16 or
6901       32 bits wide, all code units greater than 254 set the 255 bit. */
6902 
6903       else if (start_bits != NULL)
6904         {
6905         while (start_match < end_subject)
6906           {
6907           uint32_t c = UCHAR21TEST(start_match);
6908 #if PCRE2_CODE_UNIT_WIDTH != 8
6909           if (c > 255) c = 255;
6910 #endif
6911           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
6912           start_match++;
6913           }
6914 
6915         /* See comment above in first_cu checking about the next few lines. */
6916 
6917         if (mb->partial == 0 && start_match >= mb->end_subject)
6918           {
6919           rc = MATCH_NOMATCH;
6920           break;
6921           }
6922         }
6923       }   /* End first code unit handling */
6924 
6925     /* Restore fudged end_subject */
6926 
6927     end_subject = mb->end_subject;
6928 
6929     /* The following two optimizations must be disabled for partial matching. */
6930 
6931     if (mb->partial == 0)
6932       {
6933       PCRE2_SPTR p;
6934 
6935       /* The minimum matching length is a lower bound; no string of that length
6936       may actually match the pattern. Although the value is, strictly, in
6937       characters, we treat it as code units to avoid spending too much time in
6938       this optimization. */
6939 
6940       if (end_subject - start_match < re->minlength)
6941         {
6942         rc = MATCH_NOMATCH;
6943         break;
6944         }
6945 
6946       /* If req_cu is set, we know that that code unit must appear in the
6947       subject for the (non-partial) match to succeed. If the first code unit is
6948       set, req_cu must be later in the subject; otherwise the test starts at
6949       the match point. This optimization can save a huge amount of backtracking
6950       in patterns with nested unlimited repeats that aren't going to match.
6951       Writing separate code for caseful/caseless versions makes it go faster,
6952       as does using an autoincrement and backing off on a match. As in the case
6953       of the first code unit, using memchr() in the 8-bit library gives a big
6954       speed up. Unlike the first_cu check above, we do not need to call
6955       memchr() twice in the caseless case because we only need to check for the
6956       presence of the character in either case, not find the first occurrence.
6957 
6958       The search can be skipped if the code unit was found later than the
6959       current starting point in a previous iteration of the bumpalong loop.
6960 
6961       HOWEVER: when the subject string is very, very long, searching to its end
6962       can take a long time, and give bad performance on quite ordinary
6963       anchored patterns. This showed up when somebody was matching something
6964       like /^\d+C/ on a 32-megabyte string... so we don't do this when the
6965       string is sufficiently long, but it's worth searching a lot more for
6966       unanchored patterns. */
6967 
6968       p = start_match + (has_first_cu? 1:0);
6969       if (has_req_cu && p > req_cu_ptr)
6970         {
6971         PCRE2_SIZE check_length = end_subject - start_match;
6972 
6973         if (check_length < REQ_CU_MAX ||
6974               (!anchored && check_length < REQ_CU_MAX * 1000))
6975           {
6976           if (req_cu != req_cu2)  /* Caseless */
6977             {
6978 #if PCRE2_CODE_UNIT_WIDTH != 8
6979             while (p < end_subject)
6980               {
6981               uint32_t pp = UCHAR21INCTEST(p);
6982               if (pp == req_cu || pp == req_cu2) { p--; break; }
6983               }
6984 #else  /* 8-bit code units */
6985             PCRE2_SPTR pp = p;
6986             p = memchr(pp, req_cu, end_subject - pp);
6987             if (p == NULL)
6988               {
6989               p = memchr(pp, req_cu2, end_subject - pp);
6990               if (p == NULL) p = end_subject;
6991               }
6992 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
6993             }
6994 
6995           /* The caseful case */
6996 
6997           else
6998             {
6999 #if PCRE2_CODE_UNIT_WIDTH != 8
7000             while (p < end_subject)
7001               {
7002               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7003               }
7004 
7005 #else  /* 8-bit code units */
7006             p = memchr(p, req_cu, end_subject - p);
7007             if (p == NULL) p = end_subject;
7008 #endif
7009             }
7010 
7011           /* If we can't find the required code unit, break the bumpalong loop,
7012           forcing a match failure. */
7013 
7014           if (p >= end_subject)
7015             {
7016             rc = MATCH_NOMATCH;
7017             break;
7018             }
7019 
7020           /* If we have found the required code unit, save the point where we
7021           found it, so that we don't search again next time round the bumpalong
7022           loop if the start hasn't yet passed this code unit. */
7023 
7024           req_cu_ptr = p;
7025           }
7026         }
7027       }
7028     }
7029 
7030   /* ------------ End of start of match optimizations ------------ */
7031 
7032   /* Give no match if we have passed the bumpalong limit. */
7033 
7034   if (start_match > bumpalong_limit)
7035     {
7036     rc = MATCH_NOMATCH;
7037     break;
7038     }
7039 
7040   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7041   first starting point for which a partial match was found. */
7042 
7043   cb.start_match = (PCRE2_SIZE)(start_match - subject);
7044   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7045 
7046   mb->start_used_ptr = start_match;
7047   mb->last_used_ptr = start_match;
7048 #ifdef SUPPORT_UNICODE
7049   mb->moptions = options | fragment_options;
7050 #else
7051   mb->moptions = options;
7052 #endif
7053   mb->match_call_count = 0;
7054   mb->end_offset_top = 0;
7055   mb->skip_arg_count = 0;
7056 
7057   rc = match(start_match, mb->start_code, match_data->ovector,
7058     match_data->oveccount, re->top_bracket, frame_size, mb);
7059 
7060   if (mb->hitend && start_partial == NULL)
7061     {
7062     start_partial = mb->start_used_ptr;
7063     match_partial = start_match;
7064     }
7065 
7066   switch(rc)
7067     {
7068     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7069     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7070     entirely. The only way we can do that is to re-do the match at the same
7071     point, with a flag to force SKIP with an argument to be ignored. Just
7072     treating this case as NOMATCH does not work because it does not check other
7073     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7074 
7075     case MATCH_SKIP_ARG:
7076     new_start_match = start_match;
7077     mb->ignore_skip_arg = mb->skip_arg_count;
7078     break;
7079 
7080     /* SKIP passes back the next starting point explicitly, but if it is no
7081     greater than the match we have just done, treat it as NOMATCH. */
7082 
7083     case MATCH_SKIP:
7084     if (mb->verb_skip_ptr > start_match)
7085       {
7086       new_start_match = mb->verb_skip_ptr;
7087       break;
7088       }
7089     /* Fall through */
7090 
7091     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7092     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7093 
7094     case MATCH_NOMATCH:
7095     case MATCH_PRUNE:
7096     case MATCH_THEN:
7097     mb->ignore_skip_arg = 0;
7098     new_start_match = start_match + 1;
7099 #ifdef SUPPORT_UNICODE
7100     if (utf)
7101       ACROSSCHAR(new_start_match < end_subject, new_start_match,
7102         new_start_match++);
7103 #endif
7104     break;
7105 
7106     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7107 
7108     case MATCH_COMMIT:
7109     rc = MATCH_NOMATCH;
7110     goto ENDLOOP;
7111 
7112     /* Any other return is either a match, or some kind of error. */
7113 
7114     default:
7115     goto ENDLOOP;
7116     }
7117 
7118   /* Control reaches here for the various types of "no match at this point"
7119   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7120 
7121   rc = MATCH_NOMATCH;
7122 
7123   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7124   newline in the subject (though it may continue over the newline). Therefore,
7125   if we have just failed to match, starting at a newline, do not continue. */
7126 
7127   if (firstline && IS_NEWLINE(start_match)) break;
7128 
7129   /* Advance to new matching position */
7130 
7131   start_match = new_start_match;
7132 
7133   /* Break the loop if the pattern is anchored or if we have passed the end of
7134   the subject. */
7135 
7136   if (anchored || start_match > end_subject) break;
7137 
7138   /* If we have just passed a CR and we are now at a LF, and the pattern does
7139   not contain any explicit matches for \r or \n, and the newline option is CRLF
7140   or ANY or ANYCRLF, advance the match position by one more code unit. In
7141   normal matching start_match will aways be greater than the first position at
7142   this stage, but a failed *SKIP can cause a return at the same point, which is
7143   why the first test exists. */
7144 
7145   if (start_match > subject + start_offset &&
7146       start_match[-1] == CHAR_CR &&
7147       start_match < end_subject &&
7148       *start_match == CHAR_NL &&
7149       (re->flags & PCRE2_HASCRORLF) == 0 &&
7150         (mb->nltype == NLTYPE_ANY ||
7151          mb->nltype == NLTYPE_ANYCRLF ||
7152          mb->nllen == 2))
7153     start_match++;
7154 
7155   mb->mark = NULL;   /* Reset for start of next match attempt */
7156   }                  /* End of for(;;) "bumpalong" loop */
7157 
7158 /* ==========================================================================*/
7159 
7160 /* When we reach here, one of the following stopping conditions is true:
7161 
7162 (1) The match succeeded, either completely, or partially;
7163 
7164 (2) The pattern is anchored or the match was failed after (*COMMIT);
7165 
7166 (3) We are past the end of the subject or the bumpalong limit;
7167 
7168 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7169     this option requests that a match occur at or before the first newline in
7170     the subject.
7171 
7172 (5) Some kind of error occurred.
7173 
7174 */
7175 
7176 ENDLOOP:
7177 
7178 /* If end_subject != true_end_subject, it means we are handling invalid UTF,
7179 and have just processed a non-terminal fragment. If this resulted in no match
7180 or a partial match we must carry on to the next fragment (a partial match is
7181 returned to the caller only at the very end of the subject). A loop is used to
7182 avoid trying to match against empty fragments; if the pattern can match an
7183 empty string it would have done so already. */
7184 
7185 #ifdef SUPPORT_UNICODE
7186 if (utf && end_subject != true_end_subject &&
7187     (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7188   {
7189   for (;;)
7190     {
7191     /* Advance past the first bad code unit, and then skip invalid character
7192     starting code units in 8-bit and 16-bit modes. */
7193 
7194     start_match = end_subject + 1;
7195 
7196 #if PCRE2_CODE_UNIT_WIDTH != 32
7197     while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7198       start_match++;
7199 #endif
7200 
7201     /* If we have hit the end of the subject, there isn't another non-empty
7202     fragment, so give up. */
7203 
7204     if (start_match >= true_end_subject)
7205       {
7206       rc = MATCH_NOMATCH;  /* In case it was partial */
7207       break;
7208       }
7209 
7210     /* Check the rest of the subject */
7211 
7212     mb->check_subject = start_match;
7213     rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7214       &(match_data->startchar));
7215 
7216     /* The rest of the subject is valid UTF. */
7217 
7218     if (rc == 0)
7219       {
7220       mb->end_subject = end_subject = true_end_subject;
7221       fragment_options = PCRE2_NOTBOL;
7222       goto FRAGMENT_RESTART;
7223       }
7224 
7225     /* A subsequent UTF error has been found; if the next fragment is
7226     non-empty, set up to process it. Otherwise, let the loop advance. */
7227 
7228     else if (rc < 0)
7229       {
7230       mb->end_subject = end_subject = start_match + match_data->startchar;
7231       if (end_subject > start_match)
7232         {
7233         fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7234         goto FRAGMENT_RESTART;
7235         }
7236       }
7237     }
7238   }
7239 #endif  /* SUPPORT_UNICODE */
7240 
7241 /* Release an enlarged frame vector that is on the heap. */
7242 
7243 if (mb->match_frames != mb->stack_frames)
7244   mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
7245 
7246 /* Fill in fields that are always returned in the match data. */
7247 
7248 match_data->code = re;
7249 match_data->mark = mb->mark;
7250 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7251 
7252 /* Handle a fully successful match. Set the return code to the number of
7253 captured strings, or 0 if there were too many to fit into the ovector, and then
7254 set the remaining returned values before returning. Make a copy of the subject
7255 string if requested. */
7256 
7257 if (rc == MATCH_MATCH)
7258   {
7259   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7260     0 : (int)mb->end_offset_top/2 + 1;
7261   match_data->startchar = start_match - subject;
7262   match_data->leftchar = mb->start_used_ptr - subject;
7263   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7264     mb->last_used_ptr : mb->end_match_ptr) - subject;
7265   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7266     {
7267     length = CU2BYTES(length + was_zero_terminated);
7268     match_data->subject = match_data->memctl.malloc(length,
7269       match_data->memctl.memory_data);
7270     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7271     memcpy((void *)match_data->subject, subject, length);
7272     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7273     }
7274   else match_data->subject = subject;
7275   return match_data->rc;
7276   }
7277 
7278 /* Control gets here if there has been a partial match, an error, or if the
7279 overall match attempt has failed at all permitted starting positions. Any mark
7280 data is in the nomatch_mark field. */
7281 
7282 match_data->mark = mb->nomatch_mark;
7283 
7284 /* For anything other than nomatch or partial match, just return the code. */
7285 
7286 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7287 
7288 /* Handle a partial match. If a "soft" partial match was requested, searching
7289 for a complete match will have continued, and the value of rc at this point
7290 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7291 PCRE2_ERROR_PARTIAL. */
7292 
7293 else if (match_partial != NULL)
7294   {
7295   match_data->subject = subject;
7296   match_data->ovector[0] = match_partial - subject;
7297   match_data->ovector[1] = end_subject - subject;
7298   match_data->startchar = match_partial - subject;
7299   match_data->leftchar = start_partial - subject;
7300   match_data->rightchar = end_subject - subject;
7301   match_data->rc = PCRE2_ERROR_PARTIAL;
7302   }
7303 
7304 /* Else this is the classic nomatch case. */
7305 
7306 else match_data->rc = PCRE2_ERROR_NOMATCH;
7307 
7308 return match_data->rc;
7309 }
7310 
7311 /* End of pcre2_match.c */
7312