• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #define NLBLOCK mb             /* Block containing newline information */
47 #define PSSTART start_subject  /* Field containing processed string start */
48 #define PSEND   end_subject    /* Field containing processed string end */
49 
50 #include "pcre2_internal.h"
51 
52 /* Masks for identifying the public options that are permitted at match time.
53 */
54 
55 #define PUBLIC_MATCH_OPTIONS \
56   (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
57    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
58    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
59 
60 #define PUBLIC_JIT_MATCH_OPTIONS \
61    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
62     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
63 
64 /* The mb->capture_last field uses the lower 16 bits for the last captured
65 substring (which can never be greater than 65535) and a bit in the top half
66 to mean "capture vector overflowed". This odd way of doing things was
67 implemented when it was realized that preserving and restoring the overflow bit
68 whenever the last capture number was saved/restored made for a neater
69 interface, and doing it this way saved on (a) another variable, which would
70 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
71 separate set of save/restore instructions. The following defines are used in
72 implementing this. */
73 
74 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
75 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
76 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
77 
78 /* Bits for setting in mb->match_function_type to indicate two special types
79 of call to match(). We do it this way to save on using another stack variable,
80 as stack usage is to be discouraged. */
81 
82 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
83 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
84 
85 /* Non-error returns from the match() function. Error returns are externally
86 defined PCRE2_ERROR_xxx codes, which are all negative. */
87 
88 #define MATCH_MATCH        1
89 #define MATCH_NOMATCH      0
90 
91 /* Special internal returns from the match() function. Make them sufficiently
92 negative to avoid the external error codes. */
93 
94 #define MATCH_ACCEPT       (-999)
95 #define MATCH_KETRPOS      (-998)
96 #define MATCH_ONCE         (-997)
97 /* The next 5 must be kept together and in sequence so that a test that checks
98 for any one of them can use a range. */
99 #define MATCH_COMMIT       (-996)
100 #define MATCH_PRUNE        (-995)
101 #define MATCH_SKIP         (-994)
102 #define MATCH_SKIP_ARG     (-993)
103 #define MATCH_THEN         (-992)
104 #define MATCH_BACKTRACK_MAX MATCH_THEN
105 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
106 
107 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
108 
109 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
110 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
111 
112 /* Maximum number of ovector elements that can be saved on the system stack
113 when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
114 bigger, malloc() is used. This value should be a multiple of 3, because the
115 ovector length is always a multiple of 3. */
116 
117 #define OP_RECURSE_STACK_SAVE_MAX 45
118 
119 
120 
121 /*************************************************
122 *          Match a back-reference                *
123 *************************************************/
124 
125 /* This function is called only when it is known that the offset lies within
126 the offsets that have so far been used in the match. Note that in caseless
127 UTF-8 mode, the number of subject bytes matched may be different to the number
128 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
129 seems unlikely.)
130 
131 Arguments:
132   offset      index into the offset vector
133   offset_top  top of the used offset vector
134   eptr        pointer into the subject
135   mb          points to match block
136   caseless    TRUE if caseless
137   lengthptr   pointer for returning the length matched
138 
139 Returns:      = 0 sucessful match; number of code units matched is set
140               < 0 no match
141               > 0 partial match
142 */
143 
144 static int
match_ref(PCRE2_SIZE offset,PCRE2_SIZE offset_top,register PCRE2_SPTR eptr,match_block * mb,BOOL caseless,PCRE2_SIZE * lengthptr)145 match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
146   match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
147 {
148 #if defined SUPPORT_UNICODE
149 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
150 #endif
151 
152 register PCRE2_SPTR p;
153 PCRE2_SIZE length;
154 PCRE2_SPTR eptr_start = eptr;
155 
156 /* Deal with an unset group. The default is no match, but there is an option to
157 match an empty string. */
158 
159 if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
160   {
161   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
162     {
163     *lengthptr = 0;
164     return 0;      /* Match */
165     }
166   else return -1;  /* No match */
167   }
168 
169 /* Separate the caseless and UTF cases for speed. */
170 
171 p = mb->start_subject + mb->ovector[offset];
172 length = mb->ovector[offset+1] - mb->ovector[offset];
173 
174 if (caseless)
175   {
176 #if defined SUPPORT_UNICODE
177   if (utf)
178     {
179     /* Match characters up to the end of the reference. NOTE: the number of
180     code units matched may differ, because in UTF-8 there are some characters
181     whose upper and lower case versions code have different numbers of bytes.
182     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
183     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
184     sequence of two of the latter. It is important, therefore, to check the
185     length along the reference, not along the subject (earlier code did this
186     wrong). */
187 
188     PCRE2_SPTR endptr = p + length;
189     while (p < endptr)
190       {
191       uint32_t c, d;
192       const ucd_record *ur;
193       if (eptr >= mb->end_subject) return 1;   /* Partial match */
194       GETCHARINC(c, eptr);
195       GETCHARINC(d, p);
196       ur = GET_UCD(d);
197       if (c != d && c != (uint32_t)((int)d + ur->other_case))
198         {
199         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
200         for (;;)
201           {
202           if (c < *pp) return -1;  /* No match */
203           if (c == *pp++) break;
204           }
205         }
206       }
207     }
208   else
209 #endif
210 
211     /* Not in UTF mode */
212 
213     {
214     for (; length > 0; length--)
215       {
216       uint32_t cc, cp;
217       if (eptr >= mb->end_subject) return 1;   /* Partial match */
218       cc = UCHAR21TEST(eptr);
219       cp = UCHAR21TEST(p);
220       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
221         return -1;  /* No match */
222       p++;
223       eptr++;
224       }
225     }
226   }
227 
228 /* In the caseful case, we can just compare the code units, whether or not we
229 are in UTF mode. */
230 
231 else
232   {
233   for (; length > 0; length--)
234     {
235     if (eptr >= mb->end_subject) return 1;   /* Partial match */
236     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /*No match */
237     }
238   }
239 
240 *lengthptr = eptr - eptr_start;
241 return 0;  /* Match */
242 }
243 
244 
245 
246 /***************************************************************************
247 ****************************************************************************
248                    RECURSION IN THE match() FUNCTION
249 
250 The match() function is highly recursive, though not every recursive call
251 increases the recursion depth. Nevertheless, some regular expressions can cause
252 it to recurse to a great depth. I was writing for Unix, so I just let it call
253 itself recursively. This uses the stack for saving everything that has to be
254 saved for a recursive call. On Unix, the stack can be large, and this works
255 fine.
256 
257 It turns out that on some non-Unix-like systems there are problems with
258 programs that use a lot of stack. (This despite the fact that every last chip
259 has oodles of memory these days, and techniques for extending the stack have
260 been known for decades.) So....
261 
262 There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
263 recursive calls by keeping local variables that need to be preserved in blocks
264 of memory on the heap instead instead of on the stack. Macros are used to
265 achieve this so that the actual code doesn't look very different to what it
266 always used to.
267 
268 The original heap-recursive code used longjmp(). However, it seems that this
269 can be very slow on some operating systems. Following a suggestion from Stan
270 Switzer, the use of longjmp() has been abolished, at the cost of having to
271 provide a unique number for each call to RMATCH. There is no way of generating
272 a sequence of numbers at compile time in C. I have given them names, to make
273 them stand out more clearly.
274 
275 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
276 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
277 tests. Furthermore, not using longjmp() means that local dynamic variables
278 don't have indeterminate values; this has meant that the frame size can be
279 reduced because the result can be "passed back" by straight setting of the
280 variable instead of being passed in the frame.
281 ****************************************************************************
282 ***************************************************************************/
283 
284 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
285 below must be updated in sync.  */
286 
287 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
288        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
289        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
290        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
291        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
292        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
293        RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
294 
295 /* These versions of the macros use the stack, as normal. Note that the "rw"
296 argument of RMATCH isn't actually used in this definition. */
297 
298 #ifndef HEAP_MATCH_RECURSE
299 #define REGISTER register
300 #define RMATCH(ra,rb,rc,rd,re,rw) \
301   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
302 #define RRETURN(ra) return ra
303 #else
304 
305 /* These versions of the macros manage a private stack on the heap. Note that
306 the "rd" argument of RMATCH isn't actually used in this definition. It's the mb
307 argument of match(), which never changes. */
308 
309 #define REGISTER
310 
311 #define RMATCH(ra,rb,rc,rd,re,rw)\
312   {\
313   heapframe *newframe = frame->Xnextframe;\
314   if (newframe == NULL)\
315     {\
316     newframe = (heapframe *)(mb->stack_memctl.malloc)\
317       (sizeof(heapframe), mb->stack_memctl.memory_data);\
318     if (newframe == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);\
319     newframe->Xnextframe = NULL;\
320     frame->Xnextframe = newframe;\
321     }\
322   frame->Xwhere = rw;\
323   newframe->Xeptr = ra;\
324   newframe->Xecode = rb;\
325   newframe->Xmstart = mstart;\
326   newframe->Xoffset_top = rc;\
327   newframe->Xeptrb = re;\
328   newframe->Xrdepth = frame->Xrdepth + 1;\
329   newframe->Xprevframe = frame;\
330   frame = newframe;\
331   goto HEAP_RECURSE;\
332   L_##rw:;\
333   }
334 
335 #define RRETURN(ra)\
336   {\
337   heapframe *oldframe = frame;\
338   frame = oldframe->Xprevframe;\
339   if (frame != NULL)\
340     {\
341     rrc = ra;\
342     goto HEAP_RETURN;\
343     }\
344   return ra;\
345   }
346 
347 
348 /* Structure for remembering the local variables in a private frame. Arrange it
349 so as to minimize the number of holes. */
350 
351 typedef struct heapframe {
352   struct heapframe *Xprevframe;
353   struct heapframe *Xnextframe;
354 
355 #ifdef SUPPORT_UNICODE
356   PCRE2_SPTR Xcharptr;
357 #endif
358   PCRE2_SPTR Xeptr;
359   PCRE2_SPTR Xecode;
360   PCRE2_SPTR Xmstart;
361   PCRE2_SPTR Xcallpat;
362   PCRE2_SPTR Xdata;
363   PCRE2_SPTR Xnext_ecode;
364   PCRE2_SPTR Xpp;
365   PCRE2_SPTR Xprev;
366   PCRE2_SPTR Xsaved_eptr;
367 
368   eptrblock *Xeptrb;
369 
370   PCRE2_SIZE Xlength;
371   PCRE2_SIZE Xoffset;
372   PCRE2_SIZE Xoffset_top;
373   PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
374 
375   uint32_t Xfc;
376   uint32_t Xnumber;
377   uint32_t Xrdepth;
378   uint32_t Xop;
379   uint32_t Xsave_capture_last;
380 
381 #ifdef SUPPORT_UNICODE
382   uint32_t Xprop_value;
383   int Xprop_type;
384   int Xprop_fail_result;
385   int Xoclength;
386 #endif
387 
388   int Xcodelink;
389   int Xctype;
390   int Xfi;
391   int Xmax;
392   int Xmin;
393   int Xwhere;    /* Where to jump back to */
394 
395   BOOL Xcondition;
396   BOOL Xcur_is_word;
397   BOOL Xprev_is_word;
398 
399   eptrblock Xnewptrb;
400   recursion_info Xnew_recursive;
401 
402 #ifdef SUPPORT_UNICODE
403   PCRE2_UCHAR Xocchars[6];
404 #endif
405 } heapframe;
406 
407 #endif
408 
409 
410 /***************************************************************************
411 ***************************************************************************/
412 
413 
414 /* When HEAP_MATCH_RECURSE is not defined, the match() function implements
415 backtrack points by calling itself recursively in all but one case. The one
416 special case is when processing OP_RECURSE, which specifies recursion in the
417 pattern. The entire ovector must be saved and restored while processing
418 OP_RECURSE. If the ovector is small enough, instead of calling match()
419 directly, op_recurse_ovecsave() is called. This function uses the system stack
420 to save the ovector while calling match() to process the pattern recursion. */
421 
422 #ifndef HEAP_MATCH_RECURSE
423 
424 /* We need a prototype for match() because it is mutually recursive with
425 op_recurse_ovecsave(). */
426 
427 static int
428 match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
429   PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
430 
431 
432 /*************************************************
433 *      Process OP_RECURSE, stacking ovector      *
434 *************************************************/
435 
436 /* When this function is called, mb->recursive has already been updated to
437 point to a new recursion data block, and all its fields other than ovec_save
438 have been set.
439 
440 This function exists so that the local vector variable ovecsave is no longer
441 defined in the match() function, as it was in PCRE1. It is used only when there
442 is recursion in the pattern, so it wastes a lot of stack to have it defined for
443 every call of match(). We now use this function as an indirect way of calling
444 match() only in the case when ovecsave is needed. (David Wheeler used to say
445 "All problems in computer science can be solved by another level of
446 indirection.")
447 
448 HOWEVER: when this file is compiled by gcc in an optimizing mode, because this
449 function is called only once, and only from within match(), gcc will "inline"
450 it - that is, move it inside match() - and this completely negates its reason
451 for existence. Therefore, we mark it as non-inline when gcc is in use.
452 
453 Arguments:
454   eptr        pointer to current character in subject
455   callpat     the recursion point in the pattern
456   mstart      pointer to the current match start position (can be modified
457                 by encountering \K)
458   offset_top  current top pointer (highest ovector offset used + 1)
459   mb          pointer to "static" info block for the match
460   eptrb       pointer to chain of blocks containing eptr at start of
461                 brackets - for testing for empty matches
462   rdepth      the recursion depth
463 
464 Returns:      a match() return code
465 */
466 
467 static int
468 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
469 __attribute__ ((noinline))
470 #endif
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr,PCRE2_SPTR callpat,PCRE2_SPTR mstart,PCRE2_SIZE offset_top,match_block * mb,eptrblock * eptrb,uint32_t rdepth)471 op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
472   PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
473   uint32_t rdepth)
474 {
475 register int rrc;
476 BOOL cbegroup = *callpat >= OP_SBRA;
477 recursion_info *new_recursive = mb->recursive;
478 PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
479 
480 /* Save the ovector */
481 
482 new_recursive->ovec_save = ovecsave;
483 memcpy(ovecsave, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE));
484 
485 /* Do the recursion. After processing each alternative, restore the ovector
486 data and the last captured value. */
487 
488 do
489   {
490   if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
491   rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
492     mb, eptrb, rdepth + 1);
493   memcpy(mb->ovector, new_recursive->ovec_save,
494       mb->offset_end * sizeof(PCRE2_SIZE));
495   mb->capture_last = new_recursive->saved_capture_last;
496   if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
497 
498   /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
499   recursion; they cause a NOMATCH for the entire recursion. These codes
500   are defined in a range that can be tested for. */
501 
502   if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
503     return MATCH_NOMATCH;
504 
505   /* Any return code other than NOMATCH is an error. Otherwise, advance to the
506   next alternative or to the end of the recursing subpattern. If there were
507   nested recursions, mb->recursive might be changed, so reset it before
508   looping. */
509 
510   if (rrc != MATCH_NOMATCH) return rrc;
511   mb->recursive = new_recursive;
512   callpat += GET(callpat, 1);
513   }
514 while (*callpat == OP_ALT);  /* Loop for the alternatives */
515 
516 /* None of the alternatives matched. */
517 
518 return MATCH_NOMATCH;
519 }
520 #endif  /* HEAP_MATCH_RECURSE */
521 
522 
523 
524 /*************************************************
525 *         Match from current position            *
526 *************************************************/
527 
528 /* This function is called recursively in many circumstances. Whenever it
529 returns a negative (error) response, the outer incarnation must also return the
530 same response. */
531 
532 /* These macros pack up tests that are used for partial matching, and which
533 appear several times in the code. We set the "hit end" flag if the pointer is
534 at the end of the subject and also past the earliest inspected character (i.e.
535 something has been matched, even if not part of the actual matched string). For
536 hard partial matching, we then return immediately. The second one is used when
537 we already know we are past the end of the subject. */
538 
539 #define CHECK_PARTIAL()\
540   if (mb->partial != 0 && eptr >= mb->end_subject && \
541       eptr > mb->start_used_ptr) \
542     { \
543     mb->hitend = TRUE; \
544     if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
545     }
546 
547 #define SCHECK_PARTIAL()\
548   if (mb->partial != 0 && eptr > mb->start_used_ptr) \
549     { \
550     mb->hitend = TRUE; \
551     if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
552     }
553 
554 
555 /* Performance note: It might be tempting to extract commonly used fields from
556 the mb structure (e.g. utf, end_subject) into individual variables to improve
557 performance. Tests using gcc on a SPARC disproved this; in the first case, it
558 made performance worse.
559 
560 Arguments:
561    eptr        pointer to current character in subject
562    ecode       pointer to current position in compiled code
563    mstart      pointer to the current match start position (can be modified
564                  by encountering \K)
565    offset_top  current top pointer (highest ovector offset used + 1)
566    mb          pointer to "static" info block for the match
567    eptrb       pointer to chain of blocks containing eptr at start of
568                  brackets - for testing for empty matches
569    rdepth      the recursion depth
570 
571 Returns:       MATCH_MATCH if matched            )  these values are >= 0
572                MATCH_NOMATCH if failed to match  )
573                a negative MATCH_xxx value for PRUNE, SKIP, etc
574                a negative PCRE2_ERROR_xxx value if aborted by an error condition
575                  (e.g. stopped by repeated call or recursion limit)
576 */
577 
578 static int
match(REGISTER PCRE2_SPTR eptr,REGISTER PCRE2_SPTR ecode,PCRE2_SPTR mstart,PCRE2_SIZE offset_top,match_block * mb,eptrblock * eptrb,uint32_t rdepth)579 match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
580   PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
581 {
582 /* These variables do not need to be preserved over recursion in this function,
583 so they can be ordinary variables in all cases. Mark some of them with
584 "register" because they are used a lot in loops. */
585 
586 register int  rrc;         /* Returns from recursive calls */
587 register int  i;           /* Used for loops not involving calls to RMATCH() */
588 register uint32_t c;       /* Character values not kept over RMATCH() calls */
589 register BOOL utf;         /* Local copy of UTF flag for speed */
590 
591 BOOL minimize, possessive; /* Quantifier options */
592 BOOL caseless;
593 int condcode;
594 
595 /* When recursion is not being used, all "local" variables that have to be
596 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
597 frame on the stack here; subsequent instantiations are obtained from the heap
598 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
599 the top-level on the stack rather than malloc-ing them all gives a performance
600 boost in many cases where there is not much "recursion". */
601 
602 #ifdef HEAP_MATCH_RECURSE
603 heapframe *frame = (heapframe *)mb->match_frames_base;
604 
605 /* Copy in the original argument variables */
606 
607 frame->Xeptr = eptr;
608 frame->Xecode = ecode;
609 frame->Xmstart = mstart;
610 frame->Xoffset_top = offset_top;
611 frame->Xeptrb = eptrb;
612 frame->Xrdepth = rdepth;
613 
614 /* This is where control jumps back to to effect "recursion" */
615 
616 HEAP_RECURSE:
617 
618 /* Macros make the argument variables come from the current frame */
619 
620 #define eptr               frame->Xeptr
621 #define ecode              frame->Xecode
622 #define mstart             frame->Xmstart
623 #define offset_top         frame->Xoffset_top
624 #define eptrb              frame->Xeptrb
625 #define rdepth             frame->Xrdepth
626 
627 /* Ditto for the local variables */
628 
629 #ifdef SUPPORT_UNICODE
630 #define charptr            frame->Xcharptr
631 #define prop_value         frame->Xprop_value
632 #define prop_type          frame->Xprop_type
633 #define prop_fail_result   frame->Xprop_fail_result
634 #define oclength           frame->Xoclength
635 #define occhars            frame->Xocchars
636 #endif
637 
638 
639 #define callpat            frame->Xcallpat
640 #define codelink           frame->Xcodelink
641 #define data               frame->Xdata
642 #define next_ecode         frame->Xnext_ecode
643 #define pp                 frame->Xpp
644 #define prev               frame->Xprev
645 #define saved_eptr         frame->Xsaved_eptr
646 
647 #define new_recursive      frame->Xnew_recursive
648 
649 #define ctype              frame->Xctype
650 #define fc                 frame->Xfc
651 #define fi                 frame->Xfi
652 #define length             frame->Xlength
653 #define max                frame->Xmax
654 #define min                frame->Xmin
655 #define number             frame->Xnumber
656 #define offset             frame->Xoffset
657 #define op                 frame->Xop
658 #define save_capture_last  frame->Xsave_capture_last
659 #define save_offset1       frame->Xsave_offset1
660 #define save_offset2       frame->Xsave_offset2
661 #define save_offset3       frame->Xsave_offset3
662 
663 #define condition          frame->Xcondition
664 #define cur_is_word        frame->Xcur_is_word
665 #define prev_is_word       frame->Xprev_is_word
666 
667 #define newptrb            frame->Xnewptrb
668 
669 /* When normal stack-based recursion is being used for match(), local variables
670 are allocated on the stack and get preserved during recursion in the usual way.
671 In this environment, fi and i, and fc and c, can be the same variables. */
672 
673 #else         /* HEAP_MATCH_RECURSE not defined */
674 #define fi i
675 #define fc c
676 
677 /* Many of the following variables are used only in small blocks of the code.
678 My normal style of coding would have declared them within each of those blocks.
679 However, in order to accommodate the version of this code that uses an external
680 "stack" implemented on the heap, it is easier to declare them all here, so the
681 declarations can be cut out in a block. The only declarations within blocks
682 below are for variables that do not have to be preserved over a recursive call
683 to RMATCH(). */
684 
685 #ifdef SUPPORT_UNICODE
686 PCRE2_SPTR charptr;
687 #endif
688 PCRE2_SPTR callpat;
689 PCRE2_SPTR data;
690 PCRE2_SPTR next_ecode;
691 PCRE2_SPTR pp;
692 PCRE2_SPTR prev;
693 PCRE2_SPTR saved_eptr;
694 
695 PCRE2_SIZE length;
696 PCRE2_SIZE offset;
697 PCRE2_SIZE save_offset1, save_offset2, save_offset3;
698 
699 uint32_t number;
700 uint32_t op;
701 uint32_t save_capture_last;
702 
703 #ifdef SUPPORT_UNICODE
704 uint32_t prop_value;
705 int prop_type;
706 int prop_fail_result;
707 int oclength;
708 PCRE2_UCHAR occhars[6];
709 #endif
710 
711 int codelink;
712 int ctype;
713 int max;
714 int min;
715 
716 BOOL condition;
717 BOOL cur_is_word;
718 BOOL prev_is_word;
719 
720 eptrblock newptrb;
721 recursion_info new_recursive;
722 #endif  /* HEAP_MATCH_RECURSE not defined */
723 
724 /* To save space on the stack and in the heap frame, I have doubled up on some
725 of the local variables that are used only in localised parts of the code, but
726 still need to be preserved over recursive calls of match(). These macros define
727 the alternative names that are used. */
728 
729 #define allow_zero      cur_is_word
730 #define cbegroup        condition
731 #define code_offset     codelink
732 #define condassert      condition
733 #define foc             number
734 #define matched_once    prev_is_word
735 #define save_mark       data
736 
737 /* These statements are here to stop the compiler complaining about unitialized
738 variables. */
739 
740 #ifdef SUPPORT_UNICODE
741 prop_value = 0;
742 prop_fail_result = 0;
743 #endif
744 
745 
746 /* This label is used for tail recursion, which is used in a few cases even
747 when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
748 that is used. Thanks to Ian Taylor for noticing this possibility and sending
749 the original patch. */
750 
751 TAIL_RECURSE:
752 
753 /* OK, now we can get on with the real code of the function. Recursive calls
754 are specified by the macro RMATCH and RRETURN is used to return. When
755 HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
756 match() and a "return", respectively. However, RMATCH isn't like a function
757 call because it's quite a complicated macro. It has to be used in one
758 particular way. This shouldn't, however, impact performance when true recursion
759 is being used. */
760 
761 #ifdef SUPPORT_UNICODE
762 utf = (mb->poptions & PCRE2_UTF) != 0;
763 #else
764 utf = FALSE;
765 #endif
766 
767 /* First check that we haven't called match() too many times, or that we
768 haven't exceeded the recursive call limit. */
769 
770 if (mb->match_call_count++ >= mb->match_limit) RRETURN(PCRE2_ERROR_MATCHLIMIT);
771 if (rdepth >= mb->match_limit_recursion) RRETURN(PCRE2_ERROR_RECURSIONLIMIT);
772 
773 /* At the start of a group with an unlimited repeat that may match an empty
774 string, the variable mb->match_function_type contains the MATCH_CBEGROUP bit.
775 It is done this way to save having to use another function argument, which
776 would take up space on the stack. See also MATCH_CONDASSERT below.
777 
778 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
779 such remembered pointers, to be checked when we hit the closing ket, in order
780 to break infinite loops that match no characters. When match() is called in
781 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
782 NOT be used with tail recursion, because the memory block that is used is on
783 the stack, so a new one may be required for each match(). */
784 
785 if ((mb->match_function_type & MATCH_CBEGROUP) != 0)
786   {
787   newptrb.epb_saved_eptr = eptr;
788   newptrb.epb_prev = eptrb;
789   eptrb = &newptrb;
790   mb->match_function_type &= ~MATCH_CBEGROUP;
791   }
792 
793 /* Now, at last, we can start processing the opcodes. */
794 
795 for (;;)
796   {
797   minimize = possessive = FALSE;
798   op = *ecode;
799 
800   switch(op)
801     {
802     case OP_MARK:
803     mb->nomatch_mark = ecode + 2;
804     mb->mark = NULL;    /* In case previously set by assertion */
805     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
806       eptrb, RM55);
807     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
808          mb->mark == NULL) mb->mark = ecode + 2;
809 
810     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
811     argument, and we must check whether that argument matches this MARK's
812     argument. It is passed back in mb->start_match_ptr (an overloading of that
813     variable). If it does match, we reset that variable to the current subject
814     position and return MATCH_SKIP. Otherwise, pass back the return code
815     unaltered. */
816 
817     else if (rrc == MATCH_SKIP_ARG &&
818         PRIV(strcmp)(ecode + 2, mb->start_match_ptr) == 0)
819       {
820       mb->start_match_ptr = eptr;
821       RRETURN(MATCH_SKIP);
822       }
823     RRETURN(rrc);
824 
825     case OP_FAIL:
826     RRETURN(MATCH_NOMATCH);
827 
828     case OP_COMMIT:
829     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
830       eptrb, RM52);
831     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
832     RRETURN(MATCH_COMMIT);
833 
834     case OP_PRUNE:
835     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
836       eptrb, RM51);
837     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
838     RRETURN(MATCH_PRUNE);
839 
840     case OP_PRUNE_ARG:
841     mb->nomatch_mark = ecode + 2;
842     mb->mark = NULL;    /* In case previously set by assertion */
843     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
844       eptrb, RM56);
845     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
846          mb->mark == NULL) mb->mark = ecode + 2;
847     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
848     RRETURN(MATCH_PRUNE);
849 
850     case OP_SKIP:
851     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
852       eptrb, RM53);
853     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
854     mb->start_match_ptr = eptr;   /* Pass back current position */
855     RRETURN(MATCH_SKIP);
856 
857     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
858     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
859     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
860     that failed and any that precede it (either they also failed, or were not
861     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
862     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
863     set to the count of the one that failed. */
864 
865     case OP_SKIP_ARG:
866     mb->skip_arg_count++;
867     if (mb->skip_arg_count <= mb->ignore_skip_arg)
868       {
869       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
870       break;
871       }
872     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
873       eptrb, RM57);
874     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
875 
876     /* Pass back the current skip name by overloading mb->start_match_ptr and
877     returning the special MATCH_SKIP_ARG return code. This will either be
878     caught by a matching MARK, or get to the top, where it causes a rematch
879     with mb->ignore_skip_arg set to the value of mb->skip_arg_count. */
880 
881     mb->start_match_ptr = ecode + 2;
882     RRETURN(MATCH_SKIP_ARG);
883 
884     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
885     the branch in which it occurs can be determined. Overload the start of
886     match pointer to do this. */
887 
888     case OP_THEN:
889     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
890       eptrb, RM54);
891     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
892     mb->start_match_ptr = ecode;
893     RRETURN(MATCH_THEN);
894 
895     case OP_THEN_ARG:
896     mb->nomatch_mark = ecode + 2;
897     mb->mark = NULL;    /* In case previously set by assertion */
898     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
899       mb, eptrb, RM58);
900     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
901          mb->mark == NULL) mb->mark = ecode + 2;
902     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
903     mb->start_match_ptr = ecode;
904     RRETURN(MATCH_THEN);
905 
906     /* Handle an atomic group that does not contain any capturing parentheses.
907     This can be handled like an assertion. Prior to 8.13, all atomic groups
908     were handled this way. In 8.13, the code was changed as below for ONCE, so
909     that backups pass through the group and thereby reset captured values.
910     However, this uses a lot more stack, so in 8.20, atomic groups that do not
911     contain any captures generate OP_ONCE_NC, which can be handled in the old,
912     less stack intensive way.
913 
914     Check the alternative branches in turn - the matching won't pass the KET
915     for this kind of subpattern. If any one branch matches, we carry on as at
916     the end of a normal bracket, leaving the subject pointer, but resetting
917     the start-of-match value in case it was changed by \K. */
918 
919     case OP_ONCE_NC:
920     prev = ecode;
921     saved_eptr = eptr;
922     save_mark = mb->mark;
923     do
924       {
925       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM64);
926       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
927         {
928         mstart = mb->start_match_ptr;
929         break;
930         }
931       if (rrc == MATCH_THEN)
932         {
933         next_ecode = ecode + GET(ecode,1);
934         if (mb->start_match_ptr < next_ecode &&
935             (*ecode == OP_ALT || *next_ecode == OP_ALT))
936           rrc = MATCH_NOMATCH;
937         }
938 
939       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
940       ecode += GET(ecode,1);
941       mb->mark = save_mark;
942       }
943     while (*ecode == OP_ALT);
944 
945     /* If hit the end of the group (which could be repeated), fail */
946 
947     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
948 
949     /* Continue as from after the group, updating the offsets high water
950     mark, since extracts may have been taken. */
951 
952     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
953 
954     offset_top = mb->end_offset_top;
955     eptr = mb->end_match_ptr;
956 
957     /* For a non-repeating ket, just continue at this level. This also
958     happens for a repeating ket if no characters were matched in the group.
959     This is the forcible breaking of infinite loops as implemented in Perl
960     5.005. */
961 
962     if (*ecode == OP_KET || eptr == saved_eptr)
963       {
964       ecode += 1+LINK_SIZE;
965       break;
966       }
967 
968     /* The repeating kets try the rest of the pattern or restart from the
969     preceding bracket, in the appropriate order. The second "call" of match()
970     uses tail recursion, to avoid using another stack frame. */
971 
972     if (*ecode == OP_KETRMIN)
973       {
974       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM65);
975       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
976       ecode = prev;
977       goto TAIL_RECURSE;
978       }
979     else  /* OP_KETRMAX */
980       {
981       RMATCH(eptr, prev, offset_top, mb, eptrb, RM66);
982       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
983       ecode += 1 + LINK_SIZE;
984       goto TAIL_RECURSE;
985       }
986     /* Control never gets here */
987 
988     /* Handle a capturing bracket, other than those that are possessive with an
989     unlimited repeat. If there is space in the offset vector, save the current
990     subject position in the working slot at the top of the vector. We mustn't
991     change the current values of the data slot, because they may be set from a
992     previous iteration of this group, and be referred to by a reference inside
993     the group. A failure to match might occur after the group has succeeded,
994     if something later on doesn't match. For this reason, we need to restore
995     the working value and also the values of the final offsets, in case they
996     were set by a previous iteration of the same bracket.
997 
998     If there isn't enough space in the offset vector, treat this as if it were
999     a non-capturing bracket. Don't worry about setting the flag for the error
1000     case here; that is handled in the code for KET. */
1001 
1002     case OP_CBRA:
1003     case OP_SCBRA:
1004     number = GET2(ecode, 1+LINK_SIZE);
1005     offset = number << 1;
1006 
1007     if (offset < mb->offset_max)
1008       {
1009       save_offset1 = mb->ovector[offset];
1010       save_offset2 = mb->ovector[offset+1];
1011       save_offset3 = mb->ovector[mb->offset_end - number];
1012       save_capture_last = mb->capture_last;
1013       save_mark = mb->mark;
1014 
1015       mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
1016 
1017       for (;;)
1018         {
1019         if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
1020         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
1021           eptrb, RM1);
1022         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
1023 
1024         /* If we backed up to a THEN, check whether it is within the current
1025         branch by comparing the address of the THEN that is passed back with
1026         the end of the branch. If it is within the current branch, and the
1027         branch is one of two or more alternatives (it either starts or ends
1028         with OP_ALT), we have reached the limit of THEN's action, so convert
1029         the return code to NOMATCH, which will cause normal backtracking to
1030         happen from now on. Otherwise, THEN is passed back to an outer
1031         alternative. This implements Perl's treatment of parenthesized groups,
1032         where a group not containing | does not affect the current alternative,
1033         that is, (X) is NOT the same as (X|(*F)). */
1034 
1035         if (rrc == MATCH_THEN)
1036           {
1037           next_ecode = ecode + GET(ecode,1);
1038           if (mb->start_match_ptr < next_ecode &&
1039               (*ecode == OP_ALT || *next_ecode == OP_ALT))
1040             rrc = MATCH_NOMATCH;
1041           }
1042 
1043         /* Anything other than NOMATCH is passed back. */
1044 
1045         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1046         mb->capture_last = save_capture_last;
1047         ecode += GET(ecode, 1);
1048         mb->mark = save_mark;
1049         if (*ecode != OP_ALT) break;
1050         }
1051 
1052       mb->ovector[offset] = save_offset1;
1053       mb->ovector[offset+1] = save_offset2;
1054       mb->ovector[mb->offset_end - number] = save_offset3;
1055 
1056       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1057 
1058       RRETURN(rrc);
1059       }
1060 
1061     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1062     as a non-capturing bracket. */
1063 
1064     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1065     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1066 
1067     /* Non-capturing or atomic group, except for possessive with unlimited
1068     repeat and ONCE group with no captures. Loop for all the alternatives.
1069 
1070     When we get to the final alternative within the brackets, we used to return
1071     the result of a recursive call to match() whatever happened so it was
1072     possible to reduce stack usage by turning this into a tail recursion,
1073     except in the case of a possibly empty group. However, now that there is
1074     the possiblity of (*THEN) occurring in the final alternative, this
1075     optimization is no longer always possible.
1076 
1077     We can optimize if we know there are no (*THEN)s in the pattern; at present
1078     this is the best that can be done.
1079 
1080     MATCH_ONCE is returned when the end of an atomic group is successfully
1081     reached, but subsequent matching fails. It passes back up the tree (causing
1082     captured values to be reset) until the original atomic group level is
1083     reached. This is tested by comparing mb->once_target with the start of the
1084     group. At this point, the return is converted into MATCH_NOMATCH so that
1085     previous backup points can be taken. */
1086 
1087     case OP_ONCE:
1088     case OP_BRA:
1089     case OP_SBRA:
1090 
1091     for (;;)
1092       {
1093       if (op >= OP_SBRA || op == OP_ONCE)
1094         mb->match_function_type |= MATCH_CBEGROUP;
1095 
1096       /* If this is not a possibly empty group, and there are no (*THEN)s in
1097       the pattern, and this is the final alternative, optimize as described
1098       above. */
1099 
1100       else if (!mb->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1101         {
1102         ecode += PRIV(OP_lengths)[*ecode];
1103         goto TAIL_RECURSE;
1104         }
1105 
1106       /* In all other cases, we have to make another call to match(). */
1107 
1108       save_mark = mb->mark;
1109       save_capture_last = mb->capture_last;
1110       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb, eptrb,
1111         RM2);
1112 
1113       /* See comment in the code for capturing groups above about handling
1114       THEN. */
1115 
1116       if (rrc == MATCH_THEN)
1117         {
1118         next_ecode = ecode + GET(ecode,1);
1119         if (mb->start_match_ptr < next_ecode &&
1120             (*ecode == OP_ALT || *next_ecode == OP_ALT))
1121           rrc = MATCH_NOMATCH;
1122         }
1123 
1124       if (rrc != MATCH_NOMATCH)
1125         {
1126         if (rrc == MATCH_ONCE)
1127           {
1128           PCRE2_SPTR scode = ecode;
1129           if (*scode != OP_ONCE)           /* If not at start, find it */
1130             {
1131             while (*scode == OP_ALT) scode += GET(scode, 1);
1132             scode -= GET(scode, 1);
1133             }
1134           if (mb->once_target == scode) rrc = MATCH_NOMATCH;
1135           }
1136         RRETURN(rrc);
1137         }
1138       ecode += GET(ecode, 1);
1139       mb->mark = save_mark;
1140       if (*ecode != OP_ALT) break;
1141       mb->capture_last = save_capture_last;
1142       }
1143 
1144     RRETURN(MATCH_NOMATCH);
1145 
1146     /* Handle possessive capturing brackets with an unlimited repeat. We come
1147     here from BRAZERO with allow_zero set TRUE. The ovector values are
1148     handled similarly to the normal case above. However, the matching is
1149     different. The end of these brackets will always be OP_KETRPOS, which
1150     returns MATCH_KETRPOS without going further in the pattern. By this means
1151     we can handle the group by iteration rather than recursion, thereby
1152     reducing the amount of stack needed. If the ovector is too small for
1153     capturing, treat as non-capturing. */
1154 
1155     case OP_CBRAPOS:
1156     case OP_SCBRAPOS:
1157     allow_zero = FALSE;
1158 
1159     POSSESSIVE_CAPTURE:
1160     number = GET2(ecode, 1+LINK_SIZE);
1161     offset = number << 1;
1162     if (offset >= mb->offset_max) goto POSSESSIVE_NON_CAPTURE;
1163 
1164     matched_once = FALSE;
1165     code_offset = (int)(ecode - mb->start_code);
1166 
1167     save_offset1 = mb->ovector[offset];
1168     save_offset2 = mb->ovector[offset+1];
1169     save_offset3 = mb->ovector[mb->offset_end - number];
1170     save_capture_last = mb->capture_last;
1171 
1172     /* Each time round the loop, save the current subject position for use
1173     when the group matches. For MATCH_MATCH, the group has matched, so we
1174     restart it with a new subject starting position, remembering that we had
1175     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1176     usual. If we haven't matched any alternatives in any iteration, check to
1177     see if a previous iteration matched. If so, the group has matched;
1178     continue from afterwards. Otherwise it has failed; restore the previous
1179     capture values before returning NOMATCH. */
1180 
1181     for (;;)
1182       {
1183       mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
1184       if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
1185       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
1186         eptrb, RM63);
1187       if (rrc == MATCH_KETRPOS)
1188         {
1189         offset_top = mb->end_offset_top;
1190         ecode = mb->start_code + code_offset;
1191         save_capture_last = mb->capture_last;
1192         matched_once = TRUE;
1193         mstart = mb->start_match_ptr;    /* In case \K changed it */
1194         if (eptr == mb->end_match_ptr)   /* Matched an empty string */
1195           {
1196           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1197           break;
1198           }
1199         eptr = mb->end_match_ptr;
1200         continue;
1201         }
1202 
1203       /* See comment in the code for capturing groups above about handling
1204       THEN. */
1205 
1206       if (rrc == MATCH_THEN)
1207         {
1208         next_ecode = ecode + GET(ecode,1);
1209         if (mb->start_match_ptr < next_ecode &&
1210             (*ecode == OP_ALT || *next_ecode == OP_ALT))
1211           rrc = MATCH_NOMATCH;
1212         }
1213 
1214       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1215       mb->capture_last = save_capture_last;
1216       ecode += GET(ecode, 1);
1217       if (*ecode != OP_ALT) break;
1218       }
1219 
1220     if (!matched_once)
1221       {
1222       mb->ovector[offset] = save_offset1;
1223       mb->ovector[offset+1] = save_offset2;
1224       mb->ovector[mb->offset_end - number] = save_offset3;
1225       }
1226 
1227     if (allow_zero || matched_once)
1228       {
1229       ecode += 1 + LINK_SIZE;
1230       break;
1231       }
1232     RRETURN(MATCH_NOMATCH);
1233 
1234     /* Non-capturing possessive bracket with unlimited repeat. We come here
1235     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1236     without the capturing complication. It is written out separately for speed
1237     and cleanliness. */
1238 
1239     case OP_BRAPOS:
1240     case OP_SBRAPOS:
1241     allow_zero = FALSE;
1242 
1243     POSSESSIVE_NON_CAPTURE:
1244     matched_once = FALSE;
1245     code_offset = (int)(ecode - mb->start_code);
1246     save_capture_last = mb->capture_last;
1247 
1248     for (;;)
1249       {
1250       if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
1251       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
1252         eptrb, RM48);
1253       if (rrc == MATCH_KETRPOS)
1254         {
1255         offset_top = mb->end_offset_top;
1256         ecode = mb->start_code + code_offset;
1257         matched_once = TRUE;
1258         mstart = mb->start_match_ptr;   /* In case \K reset it */
1259         if (eptr == mb->end_match_ptr)  /* Matched an empty string */
1260           {
1261           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1262           break;
1263           }
1264         eptr = mb->end_match_ptr;
1265         continue;
1266         }
1267 
1268       /* See comment in the code for capturing groups above about handling
1269       THEN. */
1270 
1271       if (rrc == MATCH_THEN)
1272         {
1273         next_ecode = ecode + GET(ecode,1);
1274         if (mb->start_match_ptr < next_ecode &&
1275             (*ecode == OP_ALT || *next_ecode == OP_ALT))
1276           rrc = MATCH_NOMATCH;
1277         }
1278 
1279       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1280       ecode += GET(ecode, 1);
1281       if (*ecode != OP_ALT) break;
1282       mb->capture_last = save_capture_last;
1283       }
1284 
1285     if (matched_once || allow_zero)
1286       {
1287       ecode += 1 + LINK_SIZE;
1288       break;
1289       }
1290     RRETURN(MATCH_NOMATCH);
1291 
1292     /* Control never reaches here. */
1293 
1294     /* Conditional group: compilation checked that there are no more than two
1295     branches. If the condition is false, skipping the first branch takes us
1296     past the end of the item if there is only one branch, but that's exactly
1297     what we want. */
1298 
1299     case OP_COND:
1300     case OP_SCOND:
1301 
1302     /* The variable codelink will be added to ecode when the condition is
1303     false, to get to the second branch. Setting it to the offset to the ALT
1304     or KET, then incrementing ecode achieves this effect. We now have ecode
1305     pointing to the condition or callout. */
1306 
1307     codelink = GET(ecode, 1);   /* Offset to the second branch */
1308     ecode += 1 + LINK_SIZE;     /* From this opcode */
1309 
1310     /* Because of the way auto-callout works during compile, a callout item is
1311     inserted between OP_COND and an assertion condition. */
1312 
1313     if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR)
1314       {
1315       unsigned int callout_length = (*ecode == OP_CALLOUT)
1316           ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
1317 
1318       if (mb->callout != NULL)
1319         {
1320         pcre2_callout_block cb;
1321         cb.version          = 1;
1322         cb.capture_top      = offset_top/2;
1323         cb.capture_last     = mb->capture_last & CAPLMASK;
1324         cb.offset_vector    = mb->ovector;
1325         cb.mark             = mb->nomatch_mark;
1326         cb.subject          = mb->start_subject;
1327         cb.subject_length   = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
1328         cb.start_match      = (PCRE2_SIZE)(mstart - mb->start_subject);
1329         cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
1330         cb.pattern_position = GET(ecode, 1);
1331         cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
1332 
1333         if (*ecode == OP_CALLOUT)
1334           {
1335           cb.callout_number = ecode[1 + 2*LINK_SIZE];
1336           cb.callout_string_offset = 0;
1337           cb.callout_string = NULL;
1338           cb.callout_string_length = 0;
1339           }
1340         else
1341           {
1342           cb.callout_number = 0;
1343           cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
1344           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
1345           cb.callout_string_length =
1346             callout_length - (1 + 4*LINK_SIZE) - 2;
1347           }
1348 
1349         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
1350           RRETURN(MATCH_NOMATCH);
1351         if (rrc < 0) RRETURN(rrc);
1352         }
1353 
1354       /* Advance ecode past the callout, so it now points to the condition. We
1355       must adjust codelink so that the value of ecode+codelink is unchanged. */
1356 
1357       ecode += callout_length;
1358       codelink -= callout_length;
1359       }
1360 
1361     /* Test the various possible conditions */
1362 
1363     condition = FALSE;
1364     switch(condcode = *ecode)
1365       {
1366       case OP_RREF:                  /* Numbered group recursion test */
1367       if (mb->recursive != NULL)     /* Not recursing => FALSE */
1368         {
1369         uint32_t recno = GET2(ecode, 1);   /* Recursion group number*/
1370         condition = (recno == RREF_ANY || recno == mb->recursive->group_num);
1371         }
1372       break;
1373 
1374       case OP_DNRREF:       /* Duplicate named group recursion test */
1375       if (mb->recursive != NULL)
1376         {
1377         int count = GET2(ecode, 1 + IMM2_SIZE);
1378         PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
1379         while (count-- > 0)
1380           {
1381           uint32_t recno = GET2(slot, 0);
1382           condition = recno == mb->recursive->group_num;
1383           if (condition) break;
1384           slot += mb->name_entry_size;
1385           }
1386         }
1387       break;
1388 
1389       case OP_CREF:                  /* Numbered group used test */
1390       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1391       condition = offset < offset_top &&
1392         mb->ovector[offset] != PCRE2_UNSET;
1393       break;
1394 
1395       case OP_DNCREF:      /* Duplicate named group used test */
1396         {
1397         int count = GET2(ecode, 1 + IMM2_SIZE);
1398         PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
1399         while (count-- > 0)
1400           {
1401           offset = GET2(slot, 0) << 1;
1402           condition = offset < offset_top &&
1403             mb->ovector[offset] != PCRE2_UNSET;
1404           if (condition) break;
1405           slot += mb->name_entry_size;
1406           }
1407         }
1408       break;
1409 
1410       case OP_FALSE:
1411       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
1412       break;
1413 
1414       case OP_TRUE:
1415       condition = TRUE;
1416       break;
1417 
1418       /* The condition is an assertion. Call match() to evaluate it - setting
1419       the MATCH_CONDASSERT bit in mb->match_function_type causes it to stop at
1420       the end of an assertion. */
1421 
1422       default:
1423       mb->match_function_type |= MATCH_CONDASSERT;
1424       RMATCH(eptr, ecode, offset_top, mb, NULL, RM3);
1425       if (rrc == MATCH_MATCH)
1426         {
1427         if (mb->end_offset_top > offset_top)
1428           offset_top = mb->end_offset_top;  /* Captures may have happened */
1429         condition = TRUE;
1430 
1431         /* Advance ecode past the assertion to the start of the first branch,
1432         but adjust it so that the general choosing code below works. If the
1433         assertion has a quantifier that allows zero repeats we must skip over
1434         the BRAZERO. This is a lunatic thing to do, but somebody did! */
1435 
1436         if (*ecode == OP_BRAZERO) ecode++;
1437         ecode += GET(ecode, 1);
1438         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1439         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1440         }
1441 
1442       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1443       assertion; it is therefore treated as NOMATCH. Any other return is an
1444       error. */
1445 
1446       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1447         {
1448         RRETURN(rrc);         /* Need braces because of following else */
1449         }
1450       break;
1451       }
1452 
1453     /* Choose branch according to the condition */
1454 
1455     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1456 
1457     /* We are now at the branch that is to be obeyed. As there is only one, we
1458     can use tail recursion to avoid using another stack frame, except when
1459     there is unlimited repeat of a possibly empty group. In the latter case, a
1460     recursive call to match() is always required, unless the second alternative
1461     doesn't exist, in which case we can just plough on. Note that, for
1462     compatibility with Perl, the | in a conditional group is NOT treated as
1463     creating two alternatives. If a THEN is encountered in the branch, it
1464     propagates out to the enclosing alternative (unless nested in a deeper set
1465     of alternatives, of course). */
1466 
1467     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1468       {
1469       if (op != OP_SCOND)
1470         {
1471         goto TAIL_RECURSE;
1472         }
1473 
1474       mb->match_function_type |= MATCH_CBEGROUP;
1475       RMATCH(eptr, ecode, offset_top, mb, eptrb, RM49);
1476       RRETURN(rrc);
1477       }
1478 
1479      /* Condition false & no alternative; continue after the group. */
1480 
1481     else
1482       {
1483       }
1484     break;
1485 
1486 
1487     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1488     to close any currently open capturing brackets. */
1489 
1490     case OP_CLOSE:
1491     number = GET2(ecode, 1);   /* Must be less than 65536 */
1492     offset = number << 1;
1493     mb->capture_last = (mb->capture_last & OVFLMASK) | number;
1494     if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
1495       {
1496       mb->ovector[offset] =
1497         mb->ovector[mb->offset_end - number];
1498       mb->ovector[offset+1] = eptr - mb->start_subject;
1499 
1500       /* If this group is at or above the current highwater mark, ensure that
1501       any groups between the current high water mark and this group are marked
1502       unset and then update the high water mark. */
1503 
1504       if (offset >= offset_top)
1505         {
1506         register PCRE2_SIZE *iptr = mb->ovector + offset_top;
1507         register PCRE2_SIZE *iend = mb->ovector + offset;
1508         while (iptr < iend) *iptr++ = PCRE2_UNSET;
1509         offset_top = offset + 2;
1510         }
1511       }
1512     ecode += 1 + IMM2_SIZE;
1513     break;
1514 
1515 
1516     /* End of the pattern, either real or forced. In an assertion ACCEPT,
1517     update the last used pointer. */
1518 
1519     case OP_ASSERT_ACCEPT:
1520     if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
1521 
1522     case OP_ACCEPT:
1523     case OP_END:
1524 
1525     /* If we have matched an empty string, fail if not in an assertion and not
1526     in a recursion if either PCRE2_NOTEMPTY is set, or if PCRE2_NOTEMPTY_ATSTART
1527     is set and we have matched at the start of the subject. In both cases,
1528     backtracking will then try other alternatives, if any. */
1529 
1530     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1531          mb->recursive == NULL &&
1532          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
1533            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
1534              mstart == mb->start_subject + mb->start_offset)))
1535       RRETURN(MATCH_NOMATCH);
1536 
1537     /* Otherwise, we have a match. */
1538 
1539     mb->end_match_ptr = eptr;           /* Record where we ended */
1540     mb->end_offset_top = offset_top;    /* and how many extracts were taken */
1541     mb->start_match_ptr = mstart;       /* and the start (\K can modify) */
1542 
1543     /* For some reason, the macros don't work properly if an expression is
1544     given as the argument to RRETURN when the heap is in use. */
1545 
1546     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1547     RRETURN(rrc);
1548 
1549     /* Assertion brackets. Check the alternative branches in turn - the
1550     matching won't pass the KET for an assertion. If any one branch matches,
1551     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1552     start of each branch to move the current point backwards, so the code at
1553     this level is identical to the lookahead case. When the assertion is part
1554     of a condition, we want to return immediately afterwards. The caller of
1555     this incarnation of the match() function will have set MATCH_CONDASSERT in
1556     mb->match_function type, and one of these opcodes will be the first opcode
1557     that is processed. We use a local variable that is preserved over calls to
1558     match() to remember this case. */
1559 
1560     case OP_ASSERT:
1561     case OP_ASSERTBACK:
1562     save_mark = mb->mark;
1563     if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
1564       {
1565       condassert = TRUE;
1566       mb->match_function_type &= ~MATCH_CONDASSERT;
1567       }
1568     else condassert = FALSE;
1569 
1570     /* Loop for each branch */
1571 
1572     do
1573       {
1574       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM4);
1575 
1576       /* A match means that the assertion is true; break out of the loop
1577       that matches its alternatives. */
1578 
1579       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1580         {
1581         mstart = mb->start_match_ptr;   /* In case \K reset it */
1582         break;
1583         }
1584 
1585       /* If not matched, restore the previous mark setting. */
1586 
1587       mb->mark = save_mark;
1588 
1589       /* See comment in the code for capturing groups above about handling
1590       THEN. */
1591 
1592       if (rrc == MATCH_THEN)
1593         {
1594         next_ecode = ecode + GET(ecode,1);
1595         if (mb->start_match_ptr < next_ecode &&
1596             (*ecode == OP_ALT || *next_ecode == OP_ALT))
1597           rrc = MATCH_NOMATCH;
1598         }
1599 
1600       /* Anything other than NOMATCH causes the entire assertion to fail,
1601       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1602       uncaptured THEN, which means they take their normal effect. This
1603       consistent approach does not always have exactly the same effect as in
1604       Perl. */
1605 
1606       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1607       ecode += GET(ecode, 1);
1608       }
1609     while (*ecode == OP_ALT);   /* Continue for next alternative */
1610 
1611     /* If we have tried all the alternative branches, the assertion has
1612     failed. If not, we broke out after a match. */
1613 
1614     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1615 
1616     /* If checking an assertion for a condition, return MATCH_MATCH. */
1617 
1618     if (condassert) RRETURN(MATCH_MATCH);
1619 
1620     /* Continue from after a successful assertion, updating the offsets high
1621     water mark, since extracts may have been taken during the assertion. */
1622 
1623     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1624     ecode += 1 + LINK_SIZE;
1625     offset_top = mb->end_offset_top;
1626     continue;
1627 
1628     /* Negative assertion: all branches must fail to match for the assertion to
1629     succeed. */
1630 
1631     case OP_ASSERT_NOT:
1632     case OP_ASSERTBACK_NOT:
1633     save_mark = mb->mark;
1634     if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
1635       {
1636       condassert = TRUE;
1637       mb->match_function_type &= ~MATCH_CONDASSERT;
1638       }
1639     else condassert = FALSE;
1640 
1641     /* Loop for each alternative branch. */
1642 
1643     do
1644       {
1645       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM5);
1646       mb->mark = save_mark;   /* Always restore the mark setting */
1647 
1648       switch(rrc)
1649         {
1650         case MATCH_MATCH:            /* A successful match means */
1651         case MATCH_ACCEPT:           /* the assertion has failed. */
1652         RRETURN(MATCH_NOMATCH);
1653 
1654         case MATCH_NOMATCH:          /* Carry on with next branch */
1655         break;
1656 
1657         /* See comment in the code for capturing groups above about handling
1658         THEN. */
1659 
1660         case MATCH_THEN:
1661         next_ecode = ecode + GET(ecode,1);
1662         if (mb->start_match_ptr < next_ecode &&
1663             (*ecode == OP_ALT || *next_ecode == OP_ALT))
1664           {
1665           rrc = MATCH_NOMATCH;
1666           break;
1667           }
1668         /* Otherwise fall through. */
1669 
1670         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1671         assertion to fail to match, without considering any more alternatives.
1672         Failing to match means the assertion is true. This is a consistent
1673         approach, but does not always have the same effect as in Perl. */
1674 
1675         case MATCH_COMMIT:
1676         case MATCH_SKIP:
1677         case MATCH_SKIP_ARG:
1678         case MATCH_PRUNE:
1679         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1680         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1681 
1682         /* Anything else is an error */
1683 
1684         default:
1685         RRETURN(rrc);
1686         }
1687 
1688       /* Continue with next branch */
1689 
1690       ecode += GET(ecode,1);
1691       }
1692     while (*ecode == OP_ALT);
1693 
1694     /* All branches in the assertion failed to match. */
1695 
1696     NEG_ASSERT_TRUE:
1697     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1698     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1699     continue;
1700 
1701     /* Move the subject pointer back. This occurs only at the start of
1702     each branch of a lookbehind assertion. If we are too close to the start to
1703     move back, this match function fails. When working with UTF-8 we move
1704     back a number of characters, not bytes. */
1705 
1706     case OP_REVERSE:
1707     i = GET(ecode, 1);
1708 #ifdef SUPPORT_UNICODE
1709     if (utf)
1710       {
1711       while (i-- > 0)
1712         {
1713         if (eptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
1714         eptr--;
1715         BACKCHAR(eptr);
1716         }
1717       }
1718     else
1719 #endif
1720 
1721     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1722 
1723       {
1724       if (i > eptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
1725       eptr -= i;
1726       }
1727 
1728     /* Save the earliest consulted character, then skip to next op code */
1729 
1730     if (eptr < mb->start_used_ptr) mb->start_used_ptr = eptr;
1731     ecode += 1 + LINK_SIZE;
1732     break;
1733 
1734     /* The callout item calls an external function, if one is provided, passing
1735     details of the match so far. This is mainly for debugging, though the
1736     function is able to force a failure. */
1737 
1738     case OP_CALLOUT:
1739     case OP_CALLOUT_STR:
1740       {
1741       unsigned int callout_length = (*ecode == OP_CALLOUT)
1742           ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
1743 
1744       if (mb->callout != NULL)
1745         {
1746         pcre2_callout_block cb;
1747         cb.version          = 1;
1748         cb.callout_number   = ecode[LINK_SIZE + 1];
1749         cb.capture_top      = offset_top/2;
1750         cb.capture_last     = mb->capture_last & CAPLMASK;
1751         cb.offset_vector    = mb->ovector;
1752         cb.mark             = mb->nomatch_mark;
1753         cb.subject          = mb->start_subject;
1754         cb.subject_length   = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
1755         cb.start_match      = (PCRE2_SIZE)(mstart - mb->start_subject);
1756         cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
1757         cb.pattern_position = GET(ecode, 1);
1758         cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
1759 
1760         if (*ecode == OP_CALLOUT)
1761           {
1762           cb.callout_number = ecode[1 + 2*LINK_SIZE];
1763           cb.callout_string_offset = 0;
1764           cb.callout_string = NULL;
1765           cb.callout_string_length = 0;
1766           }
1767         else
1768           {
1769           cb.callout_number = 0;
1770           cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
1771           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
1772           cb.callout_string_length =
1773             callout_length - (1 + 4*LINK_SIZE) - 2;
1774           }
1775 
1776         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
1777           RRETURN(MATCH_NOMATCH);
1778         if (rrc < 0) RRETURN(rrc);
1779         }
1780       ecode += callout_length;
1781       }
1782     break;
1783 
1784     /* Recursion either matches the current regex, or some subexpression. The
1785     offset data is the offset to the starting bracket from the start of the
1786     whole pattern. (This is so that it works from duplicated subpatterns.)
1787 
1788     The state of the capturing groups is preserved over recursion, and
1789     re-instated afterwards. We don't know how many are started and not yet
1790     finished (offset_top records the completed total) so we just have to save
1791     all the potential data. There may be up to 65535 such values, which is too
1792     large to put on the stack, but using malloc for small numbers seems
1793     expensive. As a compromise, the stack is used when there are no more than
1794     OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
1795 
1796     There are also other values that have to be saved. We use a chained
1797     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1798     for the original version of this logic. It has, however, been hacked around
1799     a lot, so he is not to blame for the current way it works. */
1800 
1801     case OP_RECURSE:
1802       {
1803       ovecsave_frame *fr;
1804       recursion_info *ri;
1805       uint32_t recno;
1806 
1807       callpat = mb->start_code + GET(ecode, 1);
1808       recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
1809 
1810       /* Check for repeating a pattern recursion without advancing the subject
1811       pointer. This should catch convoluted mutual recursions. (Some simple
1812       cases are caught at compile time.) */
1813 
1814       for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
1815         if (recno == ri->group_num && eptr == ri->subject_position)
1816           RRETURN(PCRE2_ERROR_RECURSELOOP);
1817 
1818       /* Add to "recursing stack" */
1819 
1820       new_recursive.group_num = recno;
1821       new_recursive.saved_capture_last = mb->capture_last;
1822       new_recursive.subject_position = eptr;
1823       new_recursive.prevrec = mb->recursive;
1824       mb->recursive = &new_recursive;
1825 
1826       /* Where to continue from afterwards */
1827 
1828       ecode += 1 + LINK_SIZE;
1829 
1830       /* When we are using the system stack for match() recursion we can call a
1831       function that uses the system stack for preserving the ovector while
1832       processing the pattern recursion, but only if the ovector is small
1833       enough. */
1834 
1835 #ifndef HEAP_MATCH_RECURSE
1836       if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
1837         {
1838         rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
1839           eptrb, rdepth);
1840         mb->recursive = new_recursive.prevrec;
1841         if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
1842 
1843         /* Set where we got to in the subject, and reset the start, in case
1844         it was changed by \K. This *is* propagated back out of a recursion,
1845         for Perl compatibility. */
1846 
1847         eptr = mb->end_match_ptr;
1848         mstart = mb->start_match_ptr;
1849         break;   /* End of processing OP_RECURSE */
1850         }
1851 #endif
1852       /* If the ovector is too big, or if we are using the heap for match()
1853       recursion, we have to use the heap for saving the ovector. Used ovecsave
1854       frames are kept on a chain and re-used. This makes a small improvement in
1855       execution time on Linux. */
1856 
1857       if (mb->ovecsave_chain != NULL)
1858         {
1859         new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
1860         mb->ovecsave_chain = mb->ovecsave_chain->next;
1861         }
1862       else
1863         {
1864         fr = (ovecsave_frame *)(mb->memctl.malloc(sizeof(ovecsave_frame *) +
1865           mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
1866         if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
1867         new_recursive.ovec_save = fr->saved_ovec;
1868         }
1869 
1870       memcpy(new_recursive.ovec_save, mb->ovector,
1871         mb->offset_end * sizeof(PCRE2_SIZE));
1872 
1873       /* Do the recursion. After processing each alternative, restore the
1874       ovector data and the last captured value. This code has the same overall
1875       logic as the code in the op_recurse_ovecsave() function, but is adapted
1876       to use RMATCH/RRETURN and to release the heap block containing the saved
1877       ovector. */
1878 
1879       cbegroup = (*callpat >= OP_SBRA);
1880       do
1881         {
1882         if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
1883         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1884           mb, eptrb, RM6);
1885         memcpy(mb->ovector, new_recursive.ovec_save,
1886             mb->offset_end * sizeof(PCRE2_SIZE));
1887         mb->capture_last = new_recursive.saved_capture_last;
1888         mb->recursive = new_recursive.prevrec;
1889 
1890         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1891           {
1892           fr = (ovecsave_frame *)
1893             ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
1894           fr->next = mb->ovecsave_chain;
1895           mb->ovecsave_chain = fr;
1896 
1897           /* Set where we got to in the subject, and reset the start, in case
1898           it was changed by \K. This *is* propagated back out of a recursion,
1899           for Perl compatibility. */
1900 
1901           eptr = mb->end_match_ptr;
1902           mstart = mb->start_match_ptr;
1903           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1904           }
1905 
1906         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1907         recursion; they cause a NOMATCH for the entire recursion. These codes
1908         are defined in a range that can be tested for. */
1909 
1910         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1911           {
1912           rrc = MATCH_NOMATCH;
1913           goto RECURSION_RETURN;
1914           }
1915 
1916         /* Any return code other than NOMATCH is an error. */
1917 
1918         if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
1919         mb->recursive = &new_recursive;
1920         callpat += GET(callpat, 1);
1921         }
1922       while (*callpat == OP_ALT);
1923 
1924       RECURSION_RETURN:
1925       mb->recursive = new_recursive.prevrec;
1926       fr = (ovecsave_frame *)
1927         ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
1928       fr->next = mb->ovecsave_chain;
1929       mb->ovecsave_chain = fr;
1930       RRETURN(rrc);
1931       }
1932 
1933     RECURSION_MATCHED:
1934     break;
1935 
1936     /* An alternation is the end of a branch; scan along to find the end of the
1937     bracketed group and go to there. */
1938 
1939     case OP_ALT:
1940     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1941     break;
1942 
1943     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1944     indicating that it may occur zero times. It may repeat infinitely, or not
1945     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1946     with fixed upper repeat limits are compiled as a number of copies, with the
1947     optional ones preceded by BRAZERO or BRAMINZERO. */
1948 
1949     case OP_BRAZERO:
1950     next_ecode = ecode + 1;
1951     RMATCH(eptr, next_ecode, offset_top, mb, eptrb, RM10);
1952     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1953     do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
1954     ecode = next_ecode + 1 + LINK_SIZE;
1955     break;
1956 
1957     case OP_BRAMINZERO:
1958     next_ecode = ecode + 1;
1959     do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
1960     RMATCH(eptr, next_ecode + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
1961     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1962     ecode++;
1963     break;
1964 
1965     case OP_SKIPZERO:
1966     next_ecode = ecode+1;
1967     do next_ecode += GET(next_ecode,1); while (*next_ecode == OP_ALT);
1968     ecode = next_ecode + 1 + LINK_SIZE;
1969     break;
1970 
1971     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1972     here; just jump to the group, with allow_zero set TRUE. */
1973 
1974     case OP_BRAPOSZERO:
1975     op = *(++ecode);
1976     allow_zero = TRUE;
1977     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1978       goto POSSESSIVE_NON_CAPTURE;
1979 
1980     /* End of a group, repeated or non-repeating. */
1981 
1982     case OP_KET:
1983     case OP_KETRMIN:
1984     case OP_KETRMAX:
1985     case OP_KETRPOS:
1986     prev = ecode - GET(ecode, 1);
1987 
1988     /* If this was a group that remembered the subject start, in order to break
1989     infinite repeats of empty string matches, retrieve the subject start from
1990     the chain. Otherwise, set it NULL. */
1991 
1992     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1993       {
1994       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1995       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1996       }
1997     else saved_eptr = NULL;
1998 
1999     /* If we are at the end of an assertion group or a non-capturing atomic
2000     group, stop matching and return MATCH_MATCH, but record the current high
2001     water mark for use by positive assertions. We also need to record the match
2002     start in case it was changed by \K. */
2003 
2004     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
2005          *prev == OP_ONCE_NC)
2006       {
2007       mb->end_match_ptr = eptr;      /* For ONCE_NC */
2008       mb->end_offset_top = offset_top;
2009       mb->start_match_ptr = mstart;
2010       if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
2011       RRETURN(MATCH_MATCH);         /* Sets mb->mark */
2012       }
2013 
2014     /* For capturing groups we have to check the group number back at the start
2015     and if necessary complete handling an extraction by setting the offsets and
2016     bumping the high water mark. Whole-pattern recursion is coded as a recurse
2017     into group 0, so it won't be picked up here. Instead, we catch it when the
2018     OP_END is reached. Other recursion is handled here. We just have to record
2019     the current subject position and start match pointer and give a MATCH
2020     return. */
2021 
2022     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
2023         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
2024       {
2025       number = GET2(prev, 1+LINK_SIZE);
2026       offset = number << 1;
2027 
2028       /* Handle a recursively called group. */
2029 
2030       if (mb->recursive != NULL && mb->recursive->group_num == number)
2031         {
2032         mb->end_match_ptr = eptr;
2033         mb->start_match_ptr = mstart;
2034         if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
2035         RRETURN(MATCH_MATCH);
2036         }
2037 
2038       /* Deal with capturing */
2039 
2040       mb->capture_last = (mb->capture_last & OVFLMASK) | number;
2041       if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
2042         {
2043         /* If offset is greater than offset_top, it means that we are
2044         "skipping" a capturing group, and that group's offsets must be marked
2045         unset. In earlier versions of PCRE, all the offsets were unset at the
2046         start of matching, but this doesn't work because atomic groups and
2047         assertions can cause a value to be set that should later be unset.
2048         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
2049         part of the atomic group, but this is not on the final matching path,
2050         so must be unset when 2 is set. (If there is no group 2, there is no
2051         problem, because offset_top will then be 2, indicating no capture.) */
2052 
2053         if (offset > offset_top)
2054           {
2055           register PCRE2_SIZE *iptr = mb->ovector + offset_top;
2056           register PCRE2_SIZE *iend = mb->ovector + offset;
2057           while (iptr < iend) *iptr++ = PCRE2_UNSET;
2058           }
2059 
2060         /* Now make the extraction */
2061 
2062         mb->ovector[offset] = mb->ovector[mb->offset_end - number];
2063         mb->ovector[offset+1] = eptr - mb->start_subject;
2064         if (offset_top <= offset) offset_top = offset + 2;
2065         }
2066       }
2067 
2068     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2069     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2070     at a time from the outer level, thus saving stack. This must precede the
2071     empty string test - in this case that test is done at the outer level. */
2072 
2073     if (*ecode == OP_KETRPOS)
2074       {
2075       mb->start_match_ptr = mstart;    /* In case \K reset it */
2076       mb->end_match_ptr = eptr;
2077       mb->end_offset_top = offset_top;
2078       if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
2079       RRETURN(MATCH_KETRPOS);
2080       }
2081 
2082     /* For an ordinary non-repeating ket, just continue at this level. This
2083     also happens for a repeating ket if no characters were matched in the
2084     group. This is the forcible breaking of infinite loops as implemented in
2085     Perl 5.005. For a non-repeating atomic group that includes captures,
2086     establish a backup point by processing the rest of the pattern at a lower
2087     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2088     original OP_ONCE level, thereby bypassing intermediate backup points, but
2089     resetting any captures that happened along the way. */
2090 
2091     if (*ecode == OP_KET || eptr == saved_eptr)
2092       {
2093       if (*prev == OP_ONCE)
2094         {
2095         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM12);
2096         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2097         mb->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2098         RRETURN(MATCH_ONCE);
2099         }
2100       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2101       break;
2102       }
2103 
2104     /* The normal repeating kets try the rest of the pattern or restart from
2105     the preceding bracket, in the appropriate order. In the second case, we can
2106     use tail recursion to avoid using another stack frame, unless we have an
2107     an atomic group or an unlimited repeat of a group that can match an empty
2108     string. */
2109 
2110     if (*ecode == OP_KETRMIN)
2111       {
2112       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM7);
2113       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2114       if (*prev == OP_ONCE)
2115         {
2116         RMATCH(eptr, prev, offset_top, mb, eptrb, RM8);
2117         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2118         mb->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2119         RRETURN(MATCH_ONCE);
2120         }
2121       if (*prev >= OP_SBRA)    /* Could match an empty string */
2122         {
2123         RMATCH(eptr, prev, offset_top, mb, eptrb, RM50);
2124         RRETURN(rrc);
2125         }
2126       ecode = prev;
2127       goto TAIL_RECURSE;
2128       }
2129     else  /* OP_KETRMAX */
2130       {
2131       RMATCH(eptr, prev, offset_top, mb, eptrb, RM13);
2132       if (rrc == MATCH_ONCE && mb->once_target == prev) rrc = MATCH_NOMATCH;
2133       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2134       if (*prev == OP_ONCE)
2135         {
2136         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM9);
2137         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2138         mb->once_target = prev;
2139         RRETURN(MATCH_ONCE);
2140         }
2141       ecode += 1 + LINK_SIZE;
2142       goto TAIL_RECURSE;
2143       }
2144     /* Control never gets here */
2145 
2146     /* Not multiline mode: start of subject assertion, unless notbol. */
2147 
2148     case OP_CIRC:
2149     if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
2150       RRETURN(MATCH_NOMATCH);
2151 
2152     /* Start of subject assertion */
2153 
2154     case OP_SOD:
2155     if (eptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
2156     ecode++;
2157     break;
2158 
2159     /* Multiline mode: start of subject unless notbol, or after any newline
2160     except for one at the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
2161 
2162     case OP_CIRCM:
2163     if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
2164       RRETURN(MATCH_NOMATCH);
2165     if (eptr != mb->start_subject &&
2166         ((eptr == mb->end_subject &&
2167            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
2168          !WAS_NEWLINE(eptr)))
2169       RRETURN(MATCH_NOMATCH);
2170     ecode++;
2171     break;
2172 
2173     /* Start of match assertion */
2174 
2175     case OP_SOM:
2176     if (eptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
2177     ecode++;
2178     break;
2179 
2180     /* Reset the start of match point */
2181 
2182     case OP_SET_SOM:
2183     mstart = eptr;
2184     ecode++;
2185     break;
2186 
2187     /* Multiline mode: assert before any newline, or before end of subject
2188     unless noteol is set. */
2189 
2190     case OP_DOLLM:
2191     if (eptr < mb->end_subject)
2192       {
2193       if (!IS_NEWLINE(eptr))
2194         {
2195         if (mb->partial != 0 &&
2196             eptr + 1 >= mb->end_subject &&
2197             NLBLOCK->nltype == NLTYPE_FIXED &&
2198             NLBLOCK->nllen == 2 &&
2199             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2200           {
2201           mb->hitend = TRUE;
2202           if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
2203           }
2204         RRETURN(MATCH_NOMATCH);
2205         }
2206       }
2207     else
2208       {
2209       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
2210       SCHECK_PARTIAL();
2211       }
2212     ecode++;
2213     break;
2214 
2215     /* Not multiline mode: assert before a terminating newline or before end of
2216     subject unless noteol is set. */
2217 
2218     case OP_DOLL:
2219     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
2220     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
2221 
2222     /* ... else fall through for endonly */
2223 
2224     /* End of subject assertion (\z) */
2225 
2226     case OP_EOD:
2227     if (eptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
2228     SCHECK_PARTIAL();
2229     ecode++;
2230     break;
2231 
2232     /* End of subject or ending \n assertion (\Z) */
2233 
2234     case OP_EODN:
2235     ASSERT_NL_OR_EOS:
2236     if (eptr < mb->end_subject &&
2237         (!IS_NEWLINE(eptr) || eptr != mb->end_subject - mb->nllen))
2238       {
2239       if (mb->partial != 0 &&
2240           eptr + 1 >= mb->end_subject &&
2241           NLBLOCK->nltype == NLTYPE_FIXED &&
2242           NLBLOCK->nllen == 2 &&
2243           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2244         {
2245         mb->hitend = TRUE;
2246         if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
2247         }
2248       RRETURN(MATCH_NOMATCH);
2249       }
2250 
2251     /* Either at end of string or \n before end. */
2252 
2253     SCHECK_PARTIAL();
2254     ecode++;
2255     break;
2256 
2257     /* Word boundary assertions */
2258 
2259     case OP_NOT_WORD_BOUNDARY:
2260     case OP_WORD_BOUNDARY:
2261       {
2262 
2263       /* Find out if the previous and current characters are "word" characters.
2264       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2265       be "non-word" characters. Remember the earliest consulted character for
2266       partial matching. */
2267 
2268 #ifdef SUPPORT_UNICODE
2269       if (utf)
2270         {
2271         /* Get status of previous character */
2272 
2273         if (eptr == mb->start_subject) prev_is_word = FALSE; else
2274           {
2275           PCRE2_SPTR lastptr = eptr - 1;
2276           BACKCHAR(lastptr);
2277           if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
2278           GETCHAR(c, lastptr);
2279           if ((mb->poptions & PCRE2_UCP) != 0)
2280             {
2281             if (c == '_') prev_is_word = TRUE; else
2282               {
2283               int cat = UCD_CATEGORY(c);
2284               prev_is_word = (cat == ucp_L || cat == ucp_N);
2285               }
2286             }
2287           else
2288           prev_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
2289           }
2290 
2291         /* Get status of next character */
2292 
2293         if (eptr >= mb->end_subject)
2294           {
2295           SCHECK_PARTIAL();
2296           cur_is_word = FALSE;
2297           }
2298         else
2299           {
2300           PCRE2_SPTR nextptr = eptr + 1;
2301           FORWARDCHARTEST(nextptr, mb->end_subject);
2302           if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
2303           GETCHAR(c, eptr);
2304           if ((mb->poptions & PCRE2_UCP) != 0)
2305             {
2306             if (c == '_') cur_is_word = TRUE; else
2307               {
2308               int cat = UCD_CATEGORY(c);
2309               cur_is_word = (cat == ucp_L || cat == ucp_N);
2310               }
2311             }
2312           else
2313           cur_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
2314           }
2315         }
2316       else
2317 #endif  /* SUPPORT UTF */
2318 
2319       /* Not in UTF-8 mode, but we may still have PCRE2_UCP set, and for
2320       consistency with the behaviour of \w we do use it in this case. */
2321 
2322         {
2323         /* Get status of previous character */
2324 
2325         if (eptr == mb->start_subject) prev_is_word = FALSE; else
2326           {
2327           if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
2328 #ifdef SUPPORT_UNICODE
2329           if ((mb->poptions & PCRE2_UCP) != 0)
2330             {
2331             c = eptr[-1];
2332             if (c == '_') prev_is_word = TRUE; else
2333               {
2334               int cat = UCD_CATEGORY(c);
2335               prev_is_word = (cat == ucp_L || cat == ucp_N);
2336               }
2337             }
2338           else
2339 #endif
2340           prev_is_word = MAX_255(eptr[-1])
2341             && ((mb->ctypes[eptr[-1]] & ctype_word) != 0);
2342           }
2343 
2344         /* Get status of next character */
2345 
2346         if (eptr >= mb->end_subject)
2347           {
2348           SCHECK_PARTIAL();
2349           cur_is_word = FALSE;
2350           }
2351         else
2352           {
2353           if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
2354 #ifdef SUPPORT_UNICODE
2355           if ((mb->poptions & PCRE2_UCP) != 0)
2356             {
2357             c = *eptr;
2358             if (c == '_') cur_is_word = TRUE; else
2359               {
2360               int cat = UCD_CATEGORY(c);
2361               cur_is_word = (cat == ucp_L || cat == ucp_N);
2362               }
2363             }
2364           else
2365 #endif
2366           cur_is_word = MAX_255(*eptr)
2367             && ((mb->ctypes[*eptr] & ctype_word) != 0);
2368           }
2369         }
2370 
2371       /* Now see if the situation is what we want */
2372 
2373       if ((*ecode++ == OP_WORD_BOUNDARY)?
2374            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2375         RRETURN(MATCH_NOMATCH);
2376       }
2377     break;
2378 
2379     /* Match any single character type except newline; have to take care with
2380     CRLF newlines and partial matching. */
2381 
2382     case OP_ANY:
2383     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2384     if (mb->partial != 0 &&
2385         eptr + 1 >= mb->end_subject &&
2386         NLBLOCK->nltype == NLTYPE_FIXED &&
2387         NLBLOCK->nllen == 2 &&
2388         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2389       {
2390       mb->hitend = TRUE;
2391       if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
2392       }
2393 
2394     /* Fall through */
2395 
2396     /* Match any single character whatsoever. */
2397 
2398     case OP_ALLANY:
2399     if (eptr >= mb->end_subject)   /* DO NOT merge the eptr++ here; it must */
2400       {                            /* not be updated before SCHECK_PARTIAL. */
2401       SCHECK_PARTIAL();
2402       RRETURN(MATCH_NOMATCH);
2403       }
2404     eptr++;
2405 #ifdef SUPPORT_UNICODE
2406     if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
2407 #endif
2408     ecode++;
2409     break;
2410 
2411     /* Match a single code unit, even in UTF-8 mode. This opcode really does
2412     match any code unit, even newline. (It really should be called ANYCODEUNIT,
2413     of course - the byte name is from pre-16 bit days.) */
2414 
2415     case OP_ANYBYTE:
2416     if (eptr >= mb->end_subject)   /* DO NOT merge the eptr++ here; it must */
2417       {                            /* not be updated before SCHECK_PARTIAL. */
2418       SCHECK_PARTIAL();
2419       RRETURN(MATCH_NOMATCH);
2420       }
2421     eptr++;
2422     ecode++;
2423     break;
2424 
2425     case OP_NOT_DIGIT:
2426     if (eptr >= mb->end_subject)
2427       {
2428       SCHECK_PARTIAL();
2429       RRETURN(MATCH_NOMATCH);
2430       }
2431     GETCHARINCTEST(c, eptr);
2432     if (
2433 #ifdef SUPPORT_WIDE_CHARS
2434        c < 256 &&
2435 #endif
2436        (mb->ctypes[c] & ctype_digit) != 0
2437        )
2438       RRETURN(MATCH_NOMATCH);
2439     ecode++;
2440     break;
2441 
2442     case OP_DIGIT:
2443     if (eptr >= mb->end_subject)
2444       {
2445       SCHECK_PARTIAL();
2446       RRETURN(MATCH_NOMATCH);
2447       }
2448     GETCHARINCTEST(c, eptr);
2449     if (
2450 #ifdef SUPPORT_WIDE_CHARS
2451        c > 255 ||
2452 #endif
2453        (mb->ctypes[c] & ctype_digit) == 0
2454        )
2455       RRETURN(MATCH_NOMATCH);
2456     ecode++;
2457     break;
2458 
2459     case OP_NOT_WHITESPACE:
2460     if (eptr >= mb->end_subject)
2461       {
2462       SCHECK_PARTIAL();
2463       RRETURN(MATCH_NOMATCH);
2464       }
2465     GETCHARINCTEST(c, eptr);
2466     if (
2467 #ifdef SUPPORT_WIDE_CHARS
2468        c < 256 &&
2469 #endif
2470        (mb->ctypes[c] & ctype_space) != 0
2471        )
2472       RRETURN(MATCH_NOMATCH);
2473     ecode++;
2474     break;
2475 
2476     case OP_WHITESPACE:
2477     if (eptr >= mb->end_subject)
2478       {
2479       SCHECK_PARTIAL();
2480       RRETURN(MATCH_NOMATCH);
2481       }
2482     GETCHARINCTEST(c, eptr);
2483     if (
2484 #ifdef SUPPORT_WIDE_CHARS
2485        c > 255 ||
2486 #endif
2487        (mb->ctypes[c] & ctype_space) == 0
2488        )
2489       RRETURN(MATCH_NOMATCH);
2490     ecode++;
2491     break;
2492 
2493     case OP_NOT_WORDCHAR:
2494     if (eptr >= mb->end_subject)
2495       {
2496       SCHECK_PARTIAL();
2497       RRETURN(MATCH_NOMATCH);
2498       }
2499     GETCHARINCTEST(c, eptr);
2500     if (
2501 #ifdef SUPPORT_WIDE_CHARS
2502        c < 256 &&
2503 #endif
2504        (mb->ctypes[c] & ctype_word) != 0
2505        )
2506       RRETURN(MATCH_NOMATCH);
2507     ecode++;
2508     break;
2509 
2510     case OP_WORDCHAR:
2511     if (eptr >= mb->end_subject)
2512       {
2513       SCHECK_PARTIAL();
2514       RRETURN(MATCH_NOMATCH);
2515       }
2516     GETCHARINCTEST(c, eptr);
2517     if (
2518 #ifdef SUPPORT_WIDE_CHARS
2519        c > 255 ||
2520 #endif
2521        (mb->ctypes[c] & ctype_word) == 0
2522        )
2523       RRETURN(MATCH_NOMATCH);
2524     ecode++;
2525     break;
2526 
2527     case OP_ANYNL:
2528     if (eptr >= mb->end_subject)
2529       {
2530       SCHECK_PARTIAL();
2531       RRETURN(MATCH_NOMATCH);
2532       }
2533     GETCHARINCTEST(c, eptr);
2534     switch(c)
2535       {
2536       default: RRETURN(MATCH_NOMATCH);
2537 
2538       case CHAR_CR:
2539       if (eptr >= mb->end_subject)
2540         {
2541         SCHECK_PARTIAL();
2542         }
2543       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2544       break;
2545 
2546       case CHAR_LF:
2547       break;
2548 
2549       case CHAR_VT:
2550       case CHAR_FF:
2551       case CHAR_NEL:
2552 #ifndef EBCDIC
2553       case 0x2028:
2554       case 0x2029:
2555 #endif  /* Not EBCDIC */
2556       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2557       break;
2558       }
2559     ecode++;
2560     break;
2561 
2562     case OP_NOT_HSPACE:
2563     if (eptr >= mb->end_subject)
2564       {
2565       SCHECK_PARTIAL();
2566       RRETURN(MATCH_NOMATCH);
2567       }
2568     GETCHARINCTEST(c, eptr);
2569     switch(c)
2570       {
2571       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2572       default: break;
2573       }
2574     ecode++;
2575     break;
2576 
2577     case OP_HSPACE:
2578     if (eptr >= mb->end_subject)
2579       {
2580       SCHECK_PARTIAL();
2581       RRETURN(MATCH_NOMATCH);
2582       }
2583     GETCHARINCTEST(c, eptr);
2584     switch(c)
2585       {
2586       HSPACE_CASES: break;  /* Byte and multibyte cases */
2587       default: RRETURN(MATCH_NOMATCH);
2588       }
2589     ecode++;
2590     break;
2591 
2592     case OP_NOT_VSPACE:
2593     if (eptr >= mb->end_subject)
2594       {
2595       SCHECK_PARTIAL();
2596       RRETURN(MATCH_NOMATCH);
2597       }
2598     GETCHARINCTEST(c, eptr);
2599     switch(c)
2600       {
2601       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2602       default: break;
2603       }
2604     ecode++;
2605     break;
2606 
2607     case OP_VSPACE:
2608     if (eptr >= mb->end_subject)
2609       {
2610       SCHECK_PARTIAL();
2611       RRETURN(MATCH_NOMATCH);
2612       }
2613     GETCHARINCTEST(c, eptr);
2614     switch(c)
2615       {
2616       VSPACE_CASES: break;
2617       default: RRETURN(MATCH_NOMATCH);
2618       }
2619     ecode++;
2620     break;
2621 
2622 #ifdef SUPPORT_UNICODE
2623     /* Check the next character by Unicode property. We will get here only
2624     if the support is in the binary; otherwise a compile-time error occurs. */
2625 
2626     case OP_PROP:
2627     case OP_NOTPROP:
2628     if (eptr >= mb->end_subject)
2629       {
2630       SCHECK_PARTIAL();
2631       RRETURN(MATCH_NOMATCH);
2632       }
2633     GETCHARINCTEST(c, eptr);
2634       {
2635       const uint32_t *cp;
2636       const ucd_record *prop = GET_UCD(c);
2637 
2638       switch(ecode[1])
2639         {
2640         case PT_ANY:
2641         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2642         break;
2643 
2644         case PT_LAMP:
2645         if ((prop->chartype == ucp_Lu ||
2646              prop->chartype == ucp_Ll ||
2647              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2648           RRETURN(MATCH_NOMATCH);
2649         break;
2650 
2651         case PT_GC:
2652         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2653           RRETURN(MATCH_NOMATCH);
2654         break;
2655 
2656         case PT_PC:
2657         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2658           RRETURN(MATCH_NOMATCH);
2659         break;
2660 
2661         case PT_SC:
2662         if ((ecode[2] != prop->script) == (op == OP_PROP))
2663           RRETURN(MATCH_NOMATCH);
2664         break;
2665 
2666         /* These are specials */
2667 
2668         case PT_ALNUM:
2669         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2670              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2671           RRETURN(MATCH_NOMATCH);
2672         break;
2673 
2674         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2675         which means that Perl space and POSIX space are now identical. PCRE
2676         was changed at release 8.34. */
2677 
2678         case PT_SPACE:    /* Perl space */
2679         case PT_PXSPACE:  /* POSIX space */
2680         switch(c)
2681           {
2682           HSPACE_CASES:
2683           VSPACE_CASES:
2684           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2685           break;
2686 
2687           default:
2688           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2689             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2690           break;
2691           }
2692         break;
2693 
2694         case PT_WORD:
2695         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2696              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2697              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2698           RRETURN(MATCH_NOMATCH);
2699         break;
2700 
2701         case PT_CLIST:
2702         cp = PRIV(ucd_caseless_sets) + ecode[2];
2703         for (;;)
2704           {
2705           if (c < *cp)
2706             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2707           if (c == *cp++)
2708             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2709           }
2710         break;
2711 
2712         case PT_UCNC:
2713         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2714              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2715              c >= 0xe000) == (op == OP_NOTPROP))
2716           RRETURN(MATCH_NOMATCH);
2717         break;
2718 
2719         /* This should never occur */
2720 
2721         default:
2722         RRETURN(PCRE2_ERROR_INTERNAL);
2723         }
2724 
2725       ecode += 3;
2726       }
2727     break;
2728 
2729     /* Match an extended Unicode sequence. We will get here only if the support
2730     is in the binary; otherwise a compile-time error occurs. */
2731 
2732     case OP_EXTUNI:
2733     if (eptr >= mb->end_subject)
2734       {
2735       SCHECK_PARTIAL();
2736       RRETURN(MATCH_NOMATCH);
2737       }
2738     else
2739       {
2740       int lgb, rgb;
2741       GETCHARINCTEST(c, eptr);
2742       lgb = UCD_GRAPHBREAK(c);
2743       while (eptr < mb->end_subject)
2744         {
2745         int len = 1;
2746         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2747         rgb = UCD_GRAPHBREAK(c);
2748         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2749         lgb = rgb;
2750         eptr += len;
2751         }
2752       }
2753     CHECK_PARTIAL();
2754     ecode++;
2755     break;
2756 #endif  /* SUPPORT_UNICODE */
2757 
2758 
2759     /* Match a back reference, possibly repeatedly. Look past the end of the
2760     item to see if there is repeat information following.
2761 
2762     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2763     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2764     and OP_DNREFI are used. In this case we must scan the list of groups to
2765     which the name refers, and use the first one that is set. */
2766 
2767     case OP_DNREF:
2768     case OP_DNREFI:
2769     caseless = op == OP_DNREFI;
2770       {
2771       int count = GET2(ecode, 1+IMM2_SIZE);
2772       PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
2773       ecode += 1 + 2*IMM2_SIZE;
2774 
2775       /* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
2776       code. */
2777 
2778       offset = 0;
2779       while (count-- > 0)
2780         {
2781         offset = GET2(slot, 0) << 1;
2782         if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) break;
2783         slot += mb->name_entry_size;
2784         }
2785       }
2786     goto REF_REPEAT;
2787 
2788     case OP_REF:
2789     case OP_REFI:
2790     caseless = op == OP_REFI;
2791     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2792     ecode += 1 + IMM2_SIZE;
2793 
2794     /* Set up for repetition, or handle the non-repeated case */
2795 
2796     REF_REPEAT:
2797     switch (*ecode)
2798       {
2799       case OP_CRSTAR:
2800       case OP_CRMINSTAR:
2801       case OP_CRPLUS:
2802       case OP_CRMINPLUS:
2803       case OP_CRQUERY:
2804       case OP_CRMINQUERY:
2805       c = *ecode++ - OP_CRSTAR;
2806       minimize = (c & 1) != 0;
2807       min = rep_min[c];                 /* Pick up values from tables; */
2808       max = rep_max[c];                 /* zero for max => infinity */
2809       if (max == 0) max = INT_MAX;
2810       break;
2811 
2812       case OP_CRRANGE:
2813       case OP_CRMINRANGE:
2814       minimize = (*ecode == OP_CRMINRANGE);
2815       min = GET2(ecode, 1);
2816       max = GET2(ecode, 1 + IMM2_SIZE);
2817       if (max == 0) max = INT_MAX;
2818       ecode += 1 + 2 * IMM2_SIZE;
2819       break;
2820 
2821       default:                  /* No repeat follows */
2822         {
2823         int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
2824         if (rc != 0)
2825           {
2826           if (rc > 0) eptr = mb->end_subject;   /* Partial match */
2827           CHECK_PARTIAL();
2828           RRETURN(MATCH_NOMATCH);
2829           }
2830         }
2831       eptr += length;
2832       continue;              /* With the main loop */
2833       }
2834 
2835     /* Handle repeated back references. If a set group has length zero, just
2836     continue with the main loop, because it matches however many times. For an
2837     unset reference, if the minimum is zero, we can also just continue. We an
2838     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
2839     group be have as a zero-length group. For any other unset cases, carrying
2840     on will result in NOMATCH. */
2841 
2842     if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
2843       {
2844       if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
2845       }
2846     else  /* Group is not set */
2847       {
2848       if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
2849         continue;
2850       }
2851 
2852     /* First, ensure the minimum number of matches are present. We get back
2853     the length of the reference string explicitly rather than passing the
2854     address of eptr, so that eptr can be a register variable. */
2855 
2856     for (i = 1; i <= min; i++)
2857       {
2858       PCRE2_SIZE slength;
2859       int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
2860       if (rc != 0)
2861         {
2862         if (rc > 0) eptr = mb->end_subject;   /* Partial match */
2863         CHECK_PARTIAL();
2864         RRETURN(MATCH_NOMATCH);
2865         }
2866       eptr += slength;
2867       }
2868 
2869     /* If min = max, continue at the same level without recursion.
2870     They are not both allowed to be zero. */
2871 
2872     if (min == max) continue;
2873 
2874     /* If minimizing, keep trying and advancing the pointer */
2875 
2876     if (minimize)
2877       {
2878       for (fi = min;; fi++)
2879         {
2880         int rc;
2881         PCRE2_SIZE slength;
2882         RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
2883         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2884         if (fi >= max) RRETURN(MATCH_NOMATCH);
2885         rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
2886         if (rc != 0)
2887           {
2888           if (rc > 0) eptr = mb->end_subject;   /* Partial match */
2889           CHECK_PARTIAL();
2890           RRETURN(MATCH_NOMATCH);
2891           }
2892         eptr += slength;
2893         }
2894       /* Control never gets here */
2895       }
2896 
2897     /* If maximizing, find the longest string and work backwards, as long as
2898     the matched lengths for each iteration are the same. */
2899 
2900     else
2901       {
2902       BOOL samelengths = TRUE;
2903       pp = eptr;
2904       length = mb->ovector[offset+1] - mb->ovector[offset];
2905 
2906       for (i = min; i < max; i++)
2907         {
2908         PCRE2_SIZE slength;
2909         int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
2910 
2911         if (rc != 0)
2912           {
2913           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2914           the soft partial matching case. */
2915 
2916           if (rc > 0 && mb->partial != 0 &&
2917               mb->end_subject > mb->start_used_ptr)
2918             {
2919             mb->hitend = TRUE;
2920             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
2921             }
2922           break;
2923           }
2924 
2925         if (slength != length) samelengths = FALSE;
2926         eptr += slength;
2927         }
2928 
2929       /* If the length matched for each repetition is the same as the length of
2930       the captured group, we can easily work backwards. This is the normal
2931       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
2932       characters whose lengths (in terms of code units) differ. However, this
2933       is very rare, so we handle it by re-matching fewer and fewer times. */
2934 
2935       if (samelengths)
2936         {
2937         while (eptr >= pp)
2938           {
2939           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
2940           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2941           eptr -= length;
2942           }
2943         }
2944 
2945       /* The rare case of non-matching lengths. Re-scan the repetition for each
2946       iteration. We know that match_ref() will succeed every time. */
2947 
2948       else
2949         {
2950         max = i;
2951         for (;;)
2952           {
2953           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
2954           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2955           if (eptr == pp) break;  /* Failed after minimal repetition */
2956           eptr = pp;
2957           max--;
2958           for (i = min; i < max; i++)
2959             {
2960             PCRE2_SIZE slength;
2961             (void)match_ref(offset, offset_top, eptr, mb, caseless, &slength);
2962             eptr += slength;
2963             }
2964           }
2965         }
2966 
2967       RRETURN(MATCH_NOMATCH);
2968       }
2969     /* Control never gets here */
2970 
2971     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2972     used when all the characters in the class have values in the range 0-255,
2973     and either the matching is caseful, or the characters are in the range
2974     0-127 when UTF-8 processing is enabled. The only difference between
2975     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2976     encountered.
2977 
2978     First, look past the end of the item to see if there is repeat information
2979     following. Then obey similar code to character type repeats - written out
2980     again for speed. */
2981 
2982     case OP_NCLASS:
2983     case OP_CLASS:
2984       {
2985       /* The data variable is saved across frames, so the byte map needs to
2986       be stored there. */
2987 #define BYTE_MAP ((uint8_t *)data)
2988       data = ecode + 1;                /* Save for matching */
2989       ecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
2990 
2991       switch (*ecode)
2992         {
2993         case OP_CRSTAR:
2994         case OP_CRMINSTAR:
2995         case OP_CRPLUS:
2996         case OP_CRMINPLUS:
2997         case OP_CRQUERY:
2998         case OP_CRMINQUERY:
2999         case OP_CRPOSSTAR:
3000         case OP_CRPOSPLUS:
3001         case OP_CRPOSQUERY:
3002         c = *ecode++ - OP_CRSTAR;
3003         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3004         else possessive = TRUE;
3005         min = rep_min[c];                 /* Pick up values from tables; */
3006         max = rep_max[c];                 /* zero for max => infinity */
3007         if (max == 0) max = INT_MAX;
3008         break;
3009 
3010         case OP_CRRANGE:
3011         case OP_CRMINRANGE:
3012         case OP_CRPOSRANGE:
3013         minimize = (*ecode == OP_CRMINRANGE);
3014         possessive = (*ecode == OP_CRPOSRANGE);
3015         min = GET2(ecode, 1);
3016         max = GET2(ecode, 1 + IMM2_SIZE);
3017         if (max == 0) max = INT_MAX;
3018         ecode += 1 + 2 * IMM2_SIZE;
3019         break;
3020 
3021         default:               /* No repeat follows */
3022         min = max = 1;
3023         break;
3024         }
3025 
3026       /* First, ensure the minimum number of matches are present. */
3027 
3028 #ifdef SUPPORT_UNICODE
3029       if (utf)
3030         {
3031         for (i = 1; i <= min; i++)
3032           {
3033           if (eptr >= mb->end_subject)
3034             {
3035             SCHECK_PARTIAL();
3036             RRETURN(MATCH_NOMATCH);
3037             }
3038           GETCHARINC(c, eptr);
3039           if (c > 255)
3040             {
3041             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3042             }
3043           else
3044             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3045           }
3046         }
3047       else
3048 #endif
3049       /* Not UTF mode */
3050         {
3051         for (i = 1; i <= min; i++)
3052           {
3053           if (eptr >= mb->end_subject)
3054             {
3055             SCHECK_PARTIAL();
3056             RRETURN(MATCH_NOMATCH);
3057             }
3058           c = *eptr++;
3059 #if PCRE2_CODE_UNIT_WIDTH != 8
3060           if (c > 255)
3061             {
3062             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3063             }
3064           else
3065 #endif
3066             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3067           }
3068         }
3069 
3070       /* If max == min we can continue with the main loop without the
3071       need to recurse. */
3072 
3073       if (min == max) continue;
3074 
3075       /* If minimizing, keep testing the rest of the expression and advancing
3076       the pointer while it matches the class. */
3077 
3078       if (minimize)
3079         {
3080 #ifdef SUPPORT_UNICODE
3081         if (utf)
3082           {
3083           for (fi = min;; fi++)
3084             {
3085             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM16);
3086             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3087             if (fi >= max) RRETURN(MATCH_NOMATCH);
3088             if (eptr >= mb->end_subject)
3089               {
3090               SCHECK_PARTIAL();
3091               RRETURN(MATCH_NOMATCH);
3092               }
3093             GETCHARINC(c, eptr);
3094             if (c > 255)
3095               {
3096               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3097               }
3098             else
3099               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3100             }
3101           }
3102         else
3103 #endif
3104         /* Not UTF mode */
3105           {
3106           for (fi = min;; fi++)
3107             {
3108             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM17);
3109             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3110             if (fi >= max) RRETURN(MATCH_NOMATCH);
3111             if (eptr >= mb->end_subject)
3112               {
3113               SCHECK_PARTIAL();
3114               RRETURN(MATCH_NOMATCH);
3115               }
3116             c = *eptr++;
3117 #if PCRE2_CODE_UNIT_WIDTH != 8
3118             if (c > 255)
3119               {
3120               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3121               }
3122             else
3123 #endif
3124               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3125             }
3126           }
3127         /* Control never gets here */
3128         }
3129 
3130       /* If maximizing, find the longest possible run, then work backwards. */
3131 
3132       else
3133         {
3134         pp = eptr;
3135 
3136 #ifdef SUPPORT_UNICODE
3137         if (utf)
3138           {
3139           for (i = min; i < max; i++)
3140             {
3141             int len = 1;
3142             if (eptr >= mb->end_subject)
3143               {
3144               SCHECK_PARTIAL();
3145               break;
3146               }
3147             GETCHARLEN(c, eptr, len);
3148             if (c > 255)
3149               {
3150               if (op == OP_CLASS) break;
3151               }
3152             else
3153               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3154             eptr += len;
3155             }
3156 
3157           if (possessive) continue;    /* No backtracking */
3158 
3159           for (;;)
3160             {
3161             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM18);
3162             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3163             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3164             BACKCHAR(eptr);
3165             }
3166           }
3167         else
3168 #endif
3169           /* Not UTF mode */
3170           {
3171           for (i = min; i < max; i++)
3172             {
3173             if (eptr >= mb->end_subject)
3174               {
3175               SCHECK_PARTIAL();
3176               break;
3177               }
3178             c = *eptr;
3179 #if PCRE2_CODE_UNIT_WIDTH != 8
3180             if (c > 255)
3181               {
3182               if (op == OP_CLASS) break;
3183               }
3184             else
3185 #endif
3186               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3187             eptr++;
3188             }
3189 
3190           if (possessive) continue;    /* No backtracking */
3191 
3192           while (eptr >= pp)
3193             {
3194             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM19);
3195             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3196             eptr--;
3197             }
3198           }
3199 
3200         RRETURN(MATCH_NOMATCH);
3201         }
3202 #undef BYTE_MAP
3203       }
3204     /* Control never gets here */
3205 
3206 
3207     /* Match an extended character class. In the 8-bit library, this opcode is
3208     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3209     32-bit libraries, codepoints greater than 255 may be encountered even when
3210     UTF is not supported. */
3211 
3212 #ifdef SUPPORT_WIDE_CHARS
3213     case OP_XCLASS:
3214       {
3215       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3216       ecode += GET(ecode, 1);                      /* Advance past the item */
3217 
3218       switch (*ecode)
3219         {
3220         case OP_CRSTAR:
3221         case OP_CRMINSTAR:
3222         case OP_CRPLUS:
3223         case OP_CRMINPLUS:
3224         case OP_CRQUERY:
3225         case OP_CRMINQUERY:
3226         case OP_CRPOSSTAR:
3227         case OP_CRPOSPLUS:
3228         case OP_CRPOSQUERY:
3229         c = *ecode++ - OP_CRSTAR;
3230         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3231         else possessive = TRUE;
3232         min = rep_min[c];                 /* Pick up values from tables; */
3233         max = rep_max[c];                 /* zero for max => infinity */
3234         if (max == 0) max = INT_MAX;
3235         break;
3236 
3237         case OP_CRRANGE:
3238         case OP_CRMINRANGE:
3239         case OP_CRPOSRANGE:
3240         minimize = (*ecode == OP_CRMINRANGE);
3241         possessive = (*ecode == OP_CRPOSRANGE);
3242         min = GET2(ecode, 1);
3243         max = GET2(ecode, 1 + IMM2_SIZE);
3244         if (max == 0) max = INT_MAX;
3245         ecode += 1 + 2 * IMM2_SIZE;
3246         break;
3247 
3248         default:               /* No repeat follows */
3249         min = max = 1;
3250         break;
3251         }
3252 
3253       /* First, ensure the minimum number of matches are present. */
3254 
3255       for (i = 1; i <= min; i++)
3256         {
3257         if (eptr >= mb->end_subject)
3258           {
3259           SCHECK_PARTIAL();
3260           RRETURN(MATCH_NOMATCH);
3261           }
3262         GETCHARINCTEST(c, eptr);
3263         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3264         }
3265 
3266       /* If max == min we can continue with the main loop without the
3267       need to recurse. */
3268 
3269       if (min == max) continue;
3270 
3271       /* If minimizing, keep testing the rest of the expression and advancing
3272       the pointer while it matches the class. */
3273 
3274       if (minimize)
3275         {
3276         for (fi = min;; fi++)
3277           {
3278           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM20);
3279           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3280           if (fi >= max) RRETURN(MATCH_NOMATCH);
3281           if (eptr >= mb->end_subject)
3282             {
3283             SCHECK_PARTIAL();
3284             RRETURN(MATCH_NOMATCH);
3285             }
3286           GETCHARINCTEST(c, eptr);
3287           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3288           }
3289         /* Control never gets here */
3290         }
3291 
3292       /* If maximizing, find the longest possible run, then work backwards. */
3293 
3294       else
3295         {
3296         pp = eptr;
3297         for (i = min; i < max; i++)
3298           {
3299           int len = 1;
3300           if (eptr >= mb->end_subject)
3301             {
3302             SCHECK_PARTIAL();
3303             break;
3304             }
3305 #ifdef SUPPORT_UNICODE
3306           GETCHARLENTEST(c, eptr, len);
3307 #else
3308           c = *eptr;
3309 #endif
3310           if (!PRIV(xclass)(c, data, utf)) break;
3311           eptr += len;
3312           }
3313 
3314         if (possessive) continue;    /* No backtracking */
3315 
3316         for(;;)
3317           {
3318           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
3319           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3320           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3321 #ifdef SUPPORT_UNICODE
3322           if (utf) BACKCHAR(eptr);
3323 #endif
3324           }
3325         RRETURN(MATCH_NOMATCH);
3326         }
3327 
3328       /* Control never gets here */
3329       }
3330 #endif    /* End of XCLASS */
3331 
3332     /* Match a single character, casefully */
3333 
3334     case OP_CHAR:
3335 #ifdef SUPPORT_UNICODE
3336     if (utf)
3337       {
3338       length = 1;
3339       ecode++;
3340       GETCHARLEN(fc, ecode, length);
3341       if (length > (PCRE2_SIZE)(mb->end_subject - eptr))
3342         {
3343         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3344         RRETURN(MATCH_NOMATCH);
3345         }
3346       for (; length > 0; length--)
3347         {
3348         if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3349         }
3350       }
3351     else
3352 #endif
3353     /* Not UTF mode */
3354       {
3355       if (mb->end_subject - eptr < 1)
3356         {
3357         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3358         RRETURN(MATCH_NOMATCH);
3359         }
3360       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3361       ecode += 2;
3362       }
3363     break;
3364 
3365     /* Match a single character, caselessly. If we are at the end of the
3366     subject, give up immediately. */
3367 
3368     case OP_CHARI:
3369     if (eptr >= mb->end_subject)
3370       {
3371       SCHECK_PARTIAL();
3372       RRETURN(MATCH_NOMATCH);
3373       }
3374 
3375 #ifdef SUPPORT_UNICODE
3376     if (utf)
3377       {
3378       length = 1;
3379       ecode++;
3380       GETCHARLEN(fc, ecode, length);
3381 
3382       /* If the pattern character's value is < 128, we have only one byte, and
3383       we know that its other case must also be one byte long, so we can use the
3384       fast lookup table. We know that there is at least one byte left in the
3385       subject. */
3386 
3387       if (fc < 128)
3388         {
3389         uint32_t cc = UCHAR21(eptr);
3390         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
3391         ecode++;
3392         eptr++;
3393         }
3394 
3395       /* Otherwise we must pick up the subject character. Note that we cannot
3396       use the value of "length" to check for sufficient bytes left, because the
3397       other case of the character may have more or fewer bytes.  */
3398 
3399       else
3400         {
3401         uint32_t dc;
3402         GETCHARINC(dc, eptr);
3403         ecode += length;
3404 
3405         /* If we have Unicode property support, we can use it to test the other
3406         case of the character, if there is one. */
3407 
3408         if (fc != dc)
3409           {
3410 #ifdef SUPPORT_UNICODE
3411           if (dc != UCD_OTHERCASE(fc))
3412 #endif
3413             RRETURN(MATCH_NOMATCH);
3414           }
3415         }
3416       }
3417     else
3418 #endif   /* SUPPORT_UNICODE */
3419 
3420     /* Not UTF mode */
3421       {
3422       if (TABLE_GET(ecode[1], mb->lcc, ecode[1])
3423           != TABLE_GET(*eptr, mb->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3424       eptr++;
3425       ecode += 2;
3426       }
3427     break;
3428 
3429     /* Match a single character repeatedly. */
3430 
3431     case OP_EXACT:
3432     case OP_EXACTI:
3433     min = max = GET2(ecode, 1);
3434     ecode += 1 + IMM2_SIZE;
3435     goto REPEATCHAR;
3436 
3437     case OP_POSUPTO:
3438     case OP_POSUPTOI:
3439     possessive = TRUE;
3440     /* Fall through */
3441 
3442     case OP_UPTO:
3443     case OP_UPTOI:
3444     case OP_MINUPTO:
3445     case OP_MINUPTOI:
3446     min = 0;
3447     max = GET2(ecode, 1);
3448     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3449     ecode += 1 + IMM2_SIZE;
3450     goto REPEATCHAR;
3451 
3452     case OP_POSSTAR:
3453     case OP_POSSTARI:
3454     possessive = TRUE;
3455     min = 0;
3456     max = INT_MAX;
3457     ecode++;
3458     goto REPEATCHAR;
3459 
3460     case OP_POSPLUS:
3461     case OP_POSPLUSI:
3462     possessive = TRUE;
3463     min = 1;
3464     max = INT_MAX;
3465     ecode++;
3466     goto REPEATCHAR;
3467 
3468     case OP_POSQUERY:
3469     case OP_POSQUERYI:
3470     possessive = TRUE;
3471     min = 0;
3472     max = 1;
3473     ecode++;
3474     goto REPEATCHAR;
3475 
3476     case OP_STAR:
3477     case OP_STARI:
3478     case OP_MINSTAR:
3479     case OP_MINSTARI:
3480     case OP_PLUS:
3481     case OP_PLUSI:
3482     case OP_MINPLUS:
3483     case OP_MINPLUSI:
3484     case OP_QUERY:
3485     case OP_QUERYI:
3486     case OP_MINQUERY:
3487     case OP_MINQUERYI:
3488     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3489     minimize = (c & 1) != 0;
3490     min = rep_min[c];                 /* Pick up values from tables; */
3491     max = rep_max[c];                 /* zero for max => infinity */
3492     if (max == 0) max = INT_MAX;
3493 
3494     /* Common code for all repeated single-character matches. We first check
3495     for the minimum number of characters. If the minimum equals the maximum, we
3496     are done. Otherwise, if minimizing, check the rest of the pattern for a
3497     match; if there isn't one, advance up to the maximum, one character at a
3498     time.
3499 
3500     If maximizing, advance up to the maximum number of matching characters,
3501     until eptr is past the end of the maximum run. If possessive, we are
3502     then done (no backing up). Otherwise, match at this position; anything
3503     other than no match is immediately returned. For nomatch, back up one
3504     character, unless we are matching \R and the last thing matched was
3505     \r\n, in which case, back up two bytes. When we reach the first optional
3506     character position, we can save stack by doing a tail recurse.
3507 
3508     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3509     for speed. */
3510 
3511     REPEATCHAR:
3512 #ifdef SUPPORT_UNICODE
3513     if (utf)
3514       {
3515       length = 1;
3516       charptr = ecode;
3517       GETCHARLEN(fc, ecode, length);
3518       ecode += length;
3519 
3520       /* Handle multibyte character matching specially here. There is
3521       support for caseless matching if UCP support is present. */
3522 
3523       if (length > 1)
3524         {
3525         uint32_t othercase;
3526         if (op >= OP_STARI &&     /* Caseless */
3527             (othercase = UCD_OTHERCASE(fc)) != fc)
3528           oclength = PRIV(ord2utf)(othercase, occhars);
3529         else oclength = 0;
3530 
3531         for (i = 1; i <= min; i++)
3532           {
3533           if (eptr <= mb->end_subject - length &&
3534             memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
3535           else if (oclength > 0 &&
3536                    eptr <= mb->end_subject - oclength &&
3537                    memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
3538           else
3539             {
3540             CHECK_PARTIAL();
3541             RRETURN(MATCH_NOMATCH);
3542             }
3543           }
3544 
3545         if (min == max) continue;
3546 
3547         if (minimize)
3548           {
3549           for (fi = min;; fi++)
3550             {
3551             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM22);
3552             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3553             if (fi >= max) RRETURN(MATCH_NOMATCH);
3554             if (eptr <= mb->end_subject - length &&
3555               memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
3556             else if (oclength > 0 &&
3557                      eptr <= mb->end_subject - oclength &&
3558                      memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
3559             else
3560               {
3561               CHECK_PARTIAL();
3562               RRETURN(MATCH_NOMATCH);
3563               }
3564             }
3565           /* Control never gets here */
3566           }
3567 
3568         else  /* Maximize */
3569           {
3570           pp = eptr;
3571           for (i = min; i < max; i++)
3572             {
3573             if (eptr <= mb->end_subject - length &&
3574                 memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
3575             else if (oclength > 0 &&
3576                      eptr <= mb->end_subject - oclength &&
3577                      memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
3578             else
3579               {
3580               CHECK_PARTIAL();
3581               break;
3582               }
3583             }
3584 
3585           if (possessive) continue;    /* No backtracking */
3586 
3587           /* After \C in UTF mode, pp might be in the middle of a Unicode
3588           character. Use <= pp to ensure backtracking doesn't go too far. */
3589 
3590           for(;;)
3591             {
3592             if (eptr <= pp) goto TAIL_RECURSE;
3593             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23);
3594             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3595             eptr--;
3596             BACKCHAR(eptr);
3597             }
3598           }
3599         /* Control never gets here */
3600         }
3601 
3602       /* If the length of a UTF-8 character is 1, we fall through here, and
3603       obey the code as for non-UTF-8 characters below, though in this case the
3604       value of fc will always be < 128. */
3605       }
3606     else
3607 #endif  /* SUPPORT_UNICODE */
3608 
3609       /* When not in UTF-8 mode, load a single-byte character. */
3610       fc = *ecode++;
3611 
3612     /* The value of fc at this point is always one character, though we may
3613     or may not be in UTF mode. The code is duplicated for the caseless and
3614     caseful cases, for speed, since matching characters is likely to be quite
3615     common. First, ensure the minimum number of matches are present. If min =
3616     max, continue at the same level without recursing. Otherwise, if
3617     minimizing, keep trying the rest of the expression and advancing one
3618     matching character if failing, up to the maximum. Alternatively, if
3619     maximizing, find the maximum number of characters and work backwards. */
3620 
3621     if (op >= OP_STARI)  /* Caseless */
3622       {
3623 #if PCRE2_CODE_UNIT_WIDTH == 8
3624       /* fc must be < 128 if UTF is enabled. */
3625       foc = mb->fcc[fc];
3626 #else
3627 #ifdef SUPPORT_UNICODE
3628       if (utf && fc > 127)
3629         foc = UCD_OTHERCASE(fc);
3630       else
3631 #endif /* SUPPORT_UNICODE */
3632         foc = TABLE_GET(fc, mb->fcc, fc);
3633 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3634 
3635       for (i = 1; i <= min; i++)
3636         {
3637         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
3638         if (eptr >= mb->end_subject)
3639           {
3640           SCHECK_PARTIAL();
3641           RRETURN(MATCH_NOMATCH);
3642           }
3643         cc = UCHAR21TEST(eptr);
3644         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3645         eptr++;
3646         }
3647       if (min == max) continue;
3648       if (minimize)
3649         {
3650         for (fi = min;; fi++)
3651           {
3652           uint32_t cc;               /* Faster than PCRE2_UCHAR */
3653           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM24);
3654           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3655           if (fi >= max) RRETURN(MATCH_NOMATCH);
3656           if (eptr >= mb->end_subject)
3657             {
3658             SCHECK_PARTIAL();
3659             RRETURN(MATCH_NOMATCH);
3660             }
3661           cc = UCHAR21TEST(eptr);
3662           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3663           eptr++;
3664           }
3665         /* Control never gets here */
3666         }
3667       else  /* Maximize */
3668         {
3669         pp = eptr;
3670         for (i = min; i < max; i++)
3671           {
3672           uint32_t cc;               /* Faster than PCRE2_UCHAR */
3673           if (eptr >= mb->end_subject)
3674             {
3675             SCHECK_PARTIAL();
3676             break;
3677             }
3678           cc = UCHAR21TEST(eptr);
3679           if (fc != cc && foc != cc) break;
3680           eptr++;
3681           }
3682         if (possessive) continue;       /* No backtracking */
3683         for (;;)
3684           {
3685           if (eptr == pp) goto TAIL_RECURSE;
3686           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM25);
3687           eptr--;
3688           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3689           }
3690         /* Control never gets here */
3691         }
3692       }
3693 
3694     /* Caseful comparisons (includes all multi-byte characters) */
3695 
3696     else
3697       {
3698       for (i = 1; i <= min; i++)
3699         {
3700         if (eptr >= mb->end_subject)
3701           {
3702           SCHECK_PARTIAL();
3703           RRETURN(MATCH_NOMATCH);
3704           }
3705         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3706         }
3707 
3708       if (min == max) continue;
3709 
3710       if (minimize)
3711         {
3712         for (fi = min;; fi++)
3713           {
3714           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM26);
3715           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3716           if (fi >= max) RRETURN(MATCH_NOMATCH);
3717           if (eptr >= mb->end_subject)
3718             {
3719             SCHECK_PARTIAL();
3720             RRETURN(MATCH_NOMATCH);
3721             }
3722           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3723           }
3724         /* Control never gets here */
3725         }
3726       else  /* Maximize */
3727         {
3728         pp = eptr;
3729         for (i = min; i < max; i++)
3730           {
3731           if (eptr >= mb->end_subject)
3732             {
3733             SCHECK_PARTIAL();
3734             break;
3735             }
3736           if (fc != UCHAR21TEST(eptr)) break;
3737           eptr++;
3738           }
3739         if (possessive) continue;    /* No backtracking */
3740         for (;;)
3741           {
3742           if (eptr == pp) goto TAIL_RECURSE;
3743           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM27);
3744           eptr--;
3745           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3746           }
3747         /* Control never gets here */
3748         }
3749       }
3750     /* Control never gets here */
3751 
3752     /* Match a negated single one-byte character. The character we are
3753     checking can be multibyte. */
3754 
3755     case OP_NOT:
3756     case OP_NOTI:
3757     if (eptr >= mb->end_subject)
3758       {
3759       SCHECK_PARTIAL();
3760       RRETURN(MATCH_NOMATCH);
3761       }
3762 #ifdef SUPPORT_UNICODE
3763     if (utf)
3764       {
3765       register uint32_t ch, och;
3766 
3767       ecode++;
3768       GETCHARINC(ch, ecode);
3769       GETCHARINC(c, eptr);
3770 
3771       if (op == OP_NOT)
3772         {
3773         if (ch == c) RRETURN(MATCH_NOMATCH);
3774         }
3775       else
3776         {
3777         if (ch > 127)
3778           och = UCD_OTHERCASE(ch);
3779         else
3780           och = TABLE_GET(ch, mb->fcc, ch);
3781         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3782         }
3783       }
3784     else
3785 #endif  /* SUPPORT_UNICODE */
3786       {
3787       register uint32_t ch = ecode[1];
3788       c = *eptr++;
3789       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == c))
3790         RRETURN(MATCH_NOMATCH);
3791       ecode += 2;
3792       }
3793     break;
3794 
3795     /* Match a negated single one-byte character repeatedly. This is almost a
3796     repeat of the code for a repeated single character, but I haven't found a
3797     nice way of commoning these up that doesn't require a test of the
3798     positive/negative option for each character match. Maybe that wouldn't add
3799     very much to the time taken, but character matching *is* what this is all
3800     about... */
3801 
3802     case OP_NOTEXACT:
3803     case OP_NOTEXACTI:
3804     min = max = GET2(ecode, 1);
3805     ecode += 1 + IMM2_SIZE;
3806     goto REPEATNOTCHAR;
3807 
3808     case OP_NOTUPTO:
3809     case OP_NOTUPTOI:
3810     case OP_NOTMINUPTO:
3811     case OP_NOTMINUPTOI:
3812     min = 0;
3813     max = GET2(ecode, 1);
3814     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3815     ecode += 1 + IMM2_SIZE;
3816     goto REPEATNOTCHAR;
3817 
3818     case OP_NOTPOSSTAR:
3819     case OP_NOTPOSSTARI:
3820     possessive = TRUE;
3821     min = 0;
3822     max = INT_MAX;
3823     ecode++;
3824     goto REPEATNOTCHAR;
3825 
3826     case OP_NOTPOSPLUS:
3827     case OP_NOTPOSPLUSI:
3828     possessive = TRUE;
3829     min = 1;
3830     max = INT_MAX;
3831     ecode++;
3832     goto REPEATNOTCHAR;
3833 
3834     case OP_NOTPOSQUERY:
3835     case OP_NOTPOSQUERYI:
3836     possessive = TRUE;
3837     min = 0;
3838     max = 1;
3839     ecode++;
3840     goto REPEATNOTCHAR;
3841 
3842     case OP_NOTPOSUPTO:
3843     case OP_NOTPOSUPTOI:
3844     possessive = TRUE;
3845     min = 0;
3846     max = GET2(ecode, 1);
3847     ecode += 1 + IMM2_SIZE;
3848     goto REPEATNOTCHAR;
3849 
3850     case OP_NOTSTAR:
3851     case OP_NOTSTARI:
3852     case OP_NOTMINSTAR:
3853     case OP_NOTMINSTARI:
3854     case OP_NOTPLUS:
3855     case OP_NOTPLUSI:
3856     case OP_NOTMINPLUS:
3857     case OP_NOTMINPLUSI:
3858     case OP_NOTQUERY:
3859     case OP_NOTQUERYI:
3860     case OP_NOTMINQUERY:
3861     case OP_NOTMINQUERYI:
3862     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3863     minimize = (c & 1) != 0;
3864     min = rep_min[c];                 /* Pick up values from tables; */
3865     max = rep_max[c];                 /* zero for max => infinity */
3866     if (max == 0) max = INT_MAX;
3867 
3868     /* Common code for all repeated single-byte matches. */
3869 
3870     REPEATNOTCHAR:
3871     GETCHARINCTEST(fc, ecode);
3872 
3873     /* The code is duplicated for the caseless and caseful cases, for speed,
3874     since matching characters is likely to be quite common. First, ensure the
3875     minimum number of matches are present. If min = max, continue at the same
3876     level without recursing. Otherwise, if minimizing, keep trying the rest of
3877     the expression and advancing one matching character if failing, up to the
3878     maximum. Alternatively, if maximizing, find the maximum number of
3879     characters and work backwards. */
3880 
3881     if (op >= OP_NOTSTARI)     /* Caseless */
3882       {
3883 #ifdef SUPPORT_UNICODE
3884       if (utf && fc > 127)
3885         foc = UCD_OTHERCASE(fc);
3886       else
3887 #endif /* SUPPORT_UNICODE */
3888         foc = TABLE_GET(fc, mb->fcc, fc);
3889 
3890 #ifdef SUPPORT_UNICODE
3891       if (utf)
3892         {
3893         register uint32_t d;
3894         for (i = 1; i <= min; i++)
3895           {
3896           if (eptr >= mb->end_subject)
3897             {
3898             SCHECK_PARTIAL();
3899             RRETURN(MATCH_NOMATCH);
3900             }
3901           GETCHARINC(d, eptr);
3902           if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
3903           }
3904         }
3905       else
3906 #endif  /* SUPPORT_UNICODE */
3907       /* Not UTF mode */
3908         {
3909         for (i = 1; i <= min; i++)
3910           {
3911           if (eptr >= mb->end_subject)
3912             {
3913             SCHECK_PARTIAL();
3914             RRETURN(MATCH_NOMATCH);
3915             }
3916           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3917           eptr++;
3918           }
3919         }
3920 
3921       if (min == max) continue;
3922 
3923       if (minimize)
3924         {
3925 #ifdef SUPPORT_UNICODE
3926         if (utf)
3927           {
3928           register uint32_t d;
3929           for (fi = min;; fi++)
3930             {
3931             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM28);
3932             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3933             if (fi >= max) RRETURN(MATCH_NOMATCH);
3934             if (eptr >= mb->end_subject)
3935               {
3936               SCHECK_PARTIAL();
3937               RRETURN(MATCH_NOMATCH);
3938               }
3939             GETCHARINC(d, eptr);
3940             if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
3941             }
3942           }
3943         else
3944 #endif  /*SUPPORT_UNICODE */
3945         /* Not UTF mode */
3946           {
3947           for (fi = min;; fi++)
3948             {
3949             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM29);
3950             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3951             if (fi >= max) RRETURN(MATCH_NOMATCH);
3952             if (eptr >= mb->end_subject)
3953               {
3954               SCHECK_PARTIAL();
3955               RRETURN(MATCH_NOMATCH);
3956               }
3957             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3958             eptr++;
3959             }
3960           }
3961         /* Control never gets here */
3962         }
3963 
3964       /* Maximize case */
3965 
3966       else
3967         {
3968         pp = eptr;
3969 
3970 #ifdef SUPPORT_UNICODE
3971         if (utf)
3972           {
3973           register uint32_t d;
3974           for (i = min; i < max; i++)
3975             {
3976             int len = 1;
3977             if (eptr >= mb->end_subject)
3978               {
3979               SCHECK_PARTIAL();
3980               break;
3981               }
3982             GETCHARLEN(d, eptr, len);
3983             if (fc == d || (uint32_t)foc == d) break;
3984             eptr += len;
3985             }
3986           if (possessive) continue;    /* No backtracking */
3987 
3988           /* After \C in UTF mode, pp might be in the middle of a Unicode
3989           character. Use <= pp to ensure backtracking doesn't go too far. */
3990 
3991           for(;;)
3992             {
3993             if (eptr <= pp) goto TAIL_RECURSE;
3994             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30);
3995             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3996             eptr--;
3997             BACKCHAR(eptr);
3998             }
3999           }
4000         else
4001 #endif  /* SUPPORT_UNICODE */
4002         /* Not UTF mode */
4003           {
4004           for (i = min; i < max; i++)
4005             {
4006             if (eptr >= mb->end_subject)
4007               {
4008               SCHECK_PARTIAL();
4009               break;
4010               }
4011             if (fc == *eptr || foc == *eptr) break;
4012             eptr++;
4013             }
4014           if (possessive) continue;    /* No backtracking */
4015           for (;;)
4016             {
4017             if (eptr == pp) goto TAIL_RECURSE;
4018             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM31);
4019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4020             eptr--;
4021             }
4022           }
4023         /* Control never gets here */
4024         }
4025       }
4026 
4027     /* Caseful comparisons */
4028 
4029     else
4030       {
4031 #ifdef SUPPORT_UNICODE
4032       if (utf)
4033         {
4034         register uint32_t d;
4035         for (i = 1; i <= min; i++)
4036           {
4037           if (eptr >= mb->end_subject)
4038             {
4039             SCHECK_PARTIAL();
4040             RRETURN(MATCH_NOMATCH);
4041             }
4042           GETCHARINC(d, eptr);
4043           if (fc == d) RRETURN(MATCH_NOMATCH);
4044           }
4045         }
4046       else
4047 #endif
4048       /* Not UTF mode */
4049         {
4050         for (i = 1; i <= min; i++)
4051           {
4052           if (eptr >= mb->end_subject)
4053             {
4054             SCHECK_PARTIAL();
4055             RRETURN(MATCH_NOMATCH);
4056             }
4057           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4058           }
4059         }
4060 
4061       if (min == max) continue;
4062 
4063       if (minimize)
4064         {
4065 #ifdef SUPPORT_UNICODE
4066         if (utf)
4067           {
4068           register uint32_t d;
4069           for (fi = min;; fi++)
4070             {
4071             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM32);
4072             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4073             if (fi >= max) RRETURN(MATCH_NOMATCH);
4074             if (eptr >= mb->end_subject)
4075               {
4076               SCHECK_PARTIAL();
4077               RRETURN(MATCH_NOMATCH);
4078               }
4079             GETCHARINC(d, eptr);
4080             if (fc == d) RRETURN(MATCH_NOMATCH);
4081             }
4082           }
4083         else
4084 #endif
4085         /* Not UTF mode */
4086           {
4087           for (fi = min;; fi++)
4088             {
4089             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM33);
4090             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4091             if (fi >= max) RRETURN(MATCH_NOMATCH);
4092             if (eptr >= mb->end_subject)
4093               {
4094               SCHECK_PARTIAL();
4095               RRETURN(MATCH_NOMATCH);
4096               }
4097             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4098             }
4099           }
4100         /* Control never gets here */
4101         }
4102 
4103       /* Maximize case */
4104 
4105       else
4106         {
4107         pp = eptr;
4108 
4109 #ifdef SUPPORT_UNICODE
4110         if (utf)
4111           {
4112           register uint32_t d;
4113           for (i = min; i < max; i++)
4114             {
4115             int len = 1;
4116             if (eptr >= mb->end_subject)
4117               {
4118               SCHECK_PARTIAL();
4119               break;
4120               }
4121             GETCHARLEN(d, eptr, len);
4122             if (fc == d) break;
4123             eptr += len;
4124             }
4125           if (possessive) continue;    /* No backtracking */
4126 
4127           /* After \C in UTF mode, pp might be in the middle of a Unicode
4128           character. Use <= pp to ensure backtracking doesn't go too far. */
4129 
4130           for(;;)
4131             {
4132             if (eptr <= pp) goto TAIL_RECURSE;
4133             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34);
4134             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4135             eptr--;
4136             BACKCHAR(eptr);
4137             }
4138           }
4139         else
4140 #endif
4141         /* Not UTF mode */
4142           {
4143           for (i = min; i < max; i++)
4144             {
4145             if (eptr >= mb->end_subject)
4146               {
4147               SCHECK_PARTIAL();
4148               break;
4149               }
4150             if (fc == *eptr) break;
4151             eptr++;
4152             }
4153           if (possessive) continue;    /* No backtracking */
4154           for (;;)
4155             {
4156             if (eptr == pp) goto TAIL_RECURSE;
4157             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM35);
4158             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4159             eptr--;
4160             }
4161           }
4162         /* Control never gets here */
4163         }
4164       }
4165     /* Control never gets here */
4166 
4167     /* Match a single character type repeatedly; several different opcodes
4168     share code. This is very similar to the code for single characters, but we
4169     repeat it in the interests of efficiency. */
4170 
4171     case OP_TYPEEXACT:
4172     min = max = GET2(ecode, 1);
4173     minimize = TRUE;
4174     ecode += 1 + IMM2_SIZE;
4175     goto REPEATTYPE;
4176 
4177     case OP_TYPEUPTO:
4178     case OP_TYPEMINUPTO:
4179     min = 0;
4180     max = GET2(ecode, 1);
4181     minimize = *ecode == OP_TYPEMINUPTO;
4182     ecode += 1 + IMM2_SIZE;
4183     goto REPEATTYPE;
4184 
4185     case OP_TYPEPOSSTAR:
4186     possessive = TRUE;
4187     min = 0;
4188     max = INT_MAX;
4189     ecode++;
4190     goto REPEATTYPE;
4191 
4192     case OP_TYPEPOSPLUS:
4193     possessive = TRUE;
4194     min = 1;
4195     max = INT_MAX;
4196     ecode++;
4197     goto REPEATTYPE;
4198 
4199     case OP_TYPEPOSQUERY:
4200     possessive = TRUE;
4201     min = 0;
4202     max = 1;
4203     ecode++;
4204     goto REPEATTYPE;
4205 
4206     case OP_TYPEPOSUPTO:
4207     possessive = TRUE;
4208     min = 0;
4209     max = GET2(ecode, 1);
4210     ecode += 1 + IMM2_SIZE;
4211     goto REPEATTYPE;
4212 
4213     case OP_TYPESTAR:
4214     case OP_TYPEMINSTAR:
4215     case OP_TYPEPLUS:
4216     case OP_TYPEMINPLUS:
4217     case OP_TYPEQUERY:
4218     case OP_TYPEMINQUERY:
4219     c = *ecode++ - OP_TYPESTAR;
4220     minimize = (c & 1) != 0;
4221     min = rep_min[c];                 /* Pick up values from tables; */
4222     max = rep_max[c];                 /* zero for max => infinity */
4223     if (max == 0) max = INT_MAX;
4224 
4225     /* Common code for all repeated single character type matches. Note that
4226     in UTF-8 mode, '.' matches a character of any length, but for the other
4227     character types, the valid characters are all one-byte long. */
4228 
4229     REPEATTYPE:
4230     ctype = *ecode++;      /* Code for the character type */
4231 
4232 #ifdef SUPPORT_UNICODE
4233     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4234       {
4235       prop_fail_result = ctype == OP_NOTPROP;
4236       prop_type = *ecode++;
4237       prop_value = *ecode++;
4238       }
4239     else prop_type = -1;
4240 #endif
4241 
4242     /* First, ensure the minimum number of matches are present. Use inline
4243     code for maximizing the speed, and do the type test once at the start
4244     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4245     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4246     and single-bytes. */
4247 
4248     if (min > 0)
4249       {
4250 #ifdef SUPPORT_UNICODE
4251       if (prop_type >= 0)
4252         {
4253         switch(prop_type)
4254           {
4255           case PT_ANY:
4256           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4257           for (i = 1; i <= min; i++)
4258             {
4259             if (eptr >= mb->end_subject)
4260               {
4261               SCHECK_PARTIAL();
4262               RRETURN(MATCH_NOMATCH);
4263               }
4264             GETCHARINCTEST(c, eptr);
4265             }
4266           break;
4267 
4268           case PT_LAMP:
4269           for (i = 1; i <= min; i++)
4270             {
4271             int chartype;
4272             if (eptr >= mb->end_subject)
4273               {
4274               SCHECK_PARTIAL();
4275               RRETURN(MATCH_NOMATCH);
4276               }
4277             GETCHARINCTEST(c, eptr);
4278             chartype = UCD_CHARTYPE(c);
4279             if ((chartype == ucp_Lu ||
4280                  chartype == ucp_Ll ||
4281                  chartype == ucp_Lt) == prop_fail_result)
4282               RRETURN(MATCH_NOMATCH);
4283             }
4284           break;
4285 
4286           case PT_GC:
4287           for (i = 1; i <= min; i++)
4288             {
4289             if (eptr >= mb->end_subject)
4290               {
4291               SCHECK_PARTIAL();
4292               RRETURN(MATCH_NOMATCH);
4293               }
4294             GETCHARINCTEST(c, eptr);
4295             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4296               RRETURN(MATCH_NOMATCH);
4297             }
4298           break;
4299 
4300           case PT_PC:
4301           for (i = 1; i <= min; i++)
4302             {
4303             if (eptr >= mb->end_subject)
4304               {
4305               SCHECK_PARTIAL();
4306               RRETURN(MATCH_NOMATCH);
4307               }
4308             GETCHARINCTEST(c, eptr);
4309             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4310               RRETURN(MATCH_NOMATCH);
4311             }
4312           break;
4313 
4314           case PT_SC:
4315           for (i = 1; i <= min; i++)
4316             {
4317             if (eptr >= mb->end_subject)
4318               {
4319               SCHECK_PARTIAL();
4320               RRETURN(MATCH_NOMATCH);
4321               }
4322             GETCHARINCTEST(c, eptr);
4323             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4324               RRETURN(MATCH_NOMATCH);
4325             }
4326           break;
4327 
4328           case PT_ALNUM:
4329           for (i = 1; i <= min; i++)
4330             {
4331             int category;
4332             if (eptr >= mb->end_subject)
4333               {
4334               SCHECK_PARTIAL();
4335               RRETURN(MATCH_NOMATCH);
4336               }
4337             GETCHARINCTEST(c, eptr);
4338             category = UCD_CATEGORY(c);
4339             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4340               RRETURN(MATCH_NOMATCH);
4341             }
4342           break;
4343 
4344           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4345           which means that Perl space and POSIX space are now identical. PCRE
4346           was changed at release 8.34. */
4347 
4348           case PT_SPACE:    /* Perl space */
4349           case PT_PXSPACE:  /* POSIX space */
4350           for (i = 1; i <= min; i++)
4351             {
4352             if (eptr >= mb->end_subject)
4353               {
4354               SCHECK_PARTIAL();
4355               RRETURN(MATCH_NOMATCH);
4356               }
4357             GETCHARINCTEST(c, eptr);
4358             switch(c)
4359               {
4360               HSPACE_CASES:
4361               VSPACE_CASES:
4362               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4363               break;
4364 
4365               default:
4366               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4367                 RRETURN(MATCH_NOMATCH);
4368               break;
4369               }
4370             }
4371           break;
4372 
4373           case PT_WORD:
4374           for (i = 1; i <= min; i++)
4375             {
4376             int category;
4377             if (eptr >= mb->end_subject)
4378               {
4379               SCHECK_PARTIAL();
4380               RRETURN(MATCH_NOMATCH);
4381               }
4382             GETCHARINCTEST(c, eptr);
4383             category = UCD_CATEGORY(c);
4384             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4385                    == prop_fail_result)
4386               RRETURN(MATCH_NOMATCH);
4387             }
4388           break;
4389 
4390           case PT_CLIST:
4391           for (i = 1; i <= min; i++)
4392             {
4393             const uint32_t *cp;
4394             if (eptr >= mb->end_subject)
4395               {
4396               SCHECK_PARTIAL();
4397               RRETURN(MATCH_NOMATCH);
4398               }
4399             GETCHARINCTEST(c, eptr);
4400             cp = PRIV(ucd_caseless_sets) + prop_value;
4401             for (;;)
4402               {
4403               if (c < *cp)
4404                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4405               if (c == *cp++)
4406                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4407               }
4408             }
4409           break;
4410 
4411           case PT_UCNC:
4412           for (i = 1; i <= min; i++)
4413             {
4414             if (eptr >= mb->end_subject)
4415               {
4416               SCHECK_PARTIAL();
4417               RRETURN(MATCH_NOMATCH);
4418               }
4419             GETCHARINCTEST(c, eptr);
4420             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4421                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4422                  c >= 0xe000) == prop_fail_result)
4423               RRETURN(MATCH_NOMATCH);
4424             }
4425           break;
4426 
4427           /* This should not occur */
4428 
4429           default:
4430           RRETURN(PCRE2_ERROR_INTERNAL);
4431           }
4432         }
4433 
4434       /* Match extended Unicode sequences. We will get here only if the
4435       support is in the binary; otherwise a compile-time error occurs. */
4436 
4437       else if (ctype == OP_EXTUNI)
4438         {
4439         for (i = 1; i <= min; i++)
4440           {
4441           if (eptr >= mb->end_subject)
4442             {
4443             SCHECK_PARTIAL();
4444             RRETURN(MATCH_NOMATCH);
4445             }
4446           else
4447             {
4448             int lgb, rgb;
4449             GETCHARINCTEST(c, eptr);
4450             lgb = UCD_GRAPHBREAK(c);
4451            while (eptr < mb->end_subject)
4452               {
4453               int len = 1;
4454               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4455               rgb = UCD_GRAPHBREAK(c);
4456               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4457               lgb = rgb;
4458               eptr += len;
4459               }
4460             }
4461           CHECK_PARTIAL();
4462           }
4463         }
4464 
4465       else
4466 #endif     /* SUPPORT_UNICODE */
4467 
4468 /* Handle all other cases when the coding is UTF-8 */
4469 
4470 #ifdef SUPPORT_UNICODE
4471       if (utf) switch(ctype)
4472         {
4473         case OP_ANY:
4474         for (i = 1; i <= min; i++)
4475           {
4476           if (eptr >= mb->end_subject)
4477             {
4478             SCHECK_PARTIAL();
4479             RRETURN(MATCH_NOMATCH);
4480             }
4481           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4482           if (mb->partial != 0 &&
4483               eptr + 1 >= mb->end_subject &&
4484               NLBLOCK->nltype == NLTYPE_FIXED &&
4485               NLBLOCK->nllen == 2 &&
4486               UCHAR21(eptr) == NLBLOCK->nl[0])
4487             {
4488             mb->hitend = TRUE;
4489             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
4490             }
4491           eptr++;
4492           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
4493           }
4494         break;
4495 
4496         case OP_ALLANY:
4497         for (i = 1; i <= min; i++)
4498           {
4499           if (eptr >= mb->end_subject)
4500             {
4501             SCHECK_PARTIAL();
4502             RRETURN(MATCH_NOMATCH);
4503             }
4504           eptr++;
4505           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
4506           }
4507         break;
4508 
4509         case OP_ANYBYTE:
4510         if (eptr > mb->end_subject - min) RRETURN(MATCH_NOMATCH);
4511         eptr += min;
4512         break;
4513 
4514         case OP_ANYNL:
4515         for (i = 1; i <= min; i++)
4516           {
4517           if (eptr >= mb->end_subject)
4518             {
4519             SCHECK_PARTIAL();
4520             RRETURN(MATCH_NOMATCH);
4521             }
4522           GETCHARINC(c, eptr);
4523           switch(c)
4524             {
4525             default: RRETURN(MATCH_NOMATCH);
4526 
4527             case CHAR_CR:
4528             if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4529             break;
4530 
4531             case CHAR_LF:
4532             break;
4533 
4534             case CHAR_VT:
4535             case CHAR_FF:
4536             case CHAR_NEL:
4537 #ifndef EBCDIC
4538             case 0x2028:
4539             case 0x2029:
4540 #endif  /* Not EBCDIC */
4541             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
4542             break;
4543             }
4544           }
4545         break;
4546 
4547         case OP_NOT_HSPACE:
4548         for (i = 1; i <= min; i++)
4549           {
4550           if (eptr >= mb->end_subject)
4551             {
4552             SCHECK_PARTIAL();
4553             RRETURN(MATCH_NOMATCH);
4554             }
4555           GETCHARINC(c, eptr);
4556           switch(c)
4557             {
4558             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4559             default: break;
4560             }
4561           }
4562         break;
4563 
4564         case OP_HSPACE:
4565         for (i = 1; i <= min; i++)
4566           {
4567           if (eptr >= mb->end_subject)
4568             {
4569             SCHECK_PARTIAL();
4570             RRETURN(MATCH_NOMATCH);
4571             }
4572           GETCHARINC(c, eptr);
4573           switch(c)
4574             {
4575             HSPACE_CASES: break;  /* Byte and multibyte cases */
4576             default: RRETURN(MATCH_NOMATCH);
4577             }
4578           }
4579         break;
4580 
4581         case OP_NOT_VSPACE:
4582         for (i = 1; i <= min; i++)
4583           {
4584           if (eptr >= mb->end_subject)
4585             {
4586             SCHECK_PARTIAL();
4587             RRETURN(MATCH_NOMATCH);
4588             }
4589           GETCHARINC(c, eptr);
4590           switch(c)
4591             {
4592             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4593             default: break;
4594             }
4595           }
4596         break;
4597 
4598         case OP_VSPACE:
4599         for (i = 1; i <= min; i++)
4600           {
4601           if (eptr >= mb->end_subject)
4602             {
4603             SCHECK_PARTIAL();
4604             RRETURN(MATCH_NOMATCH);
4605             }
4606           GETCHARINC(c, eptr);
4607           switch(c)
4608             {
4609             VSPACE_CASES: break;
4610             default: RRETURN(MATCH_NOMATCH);
4611             }
4612           }
4613         break;
4614 
4615         case OP_NOT_DIGIT:
4616         for (i = 1; i <= min; i++)
4617           {
4618           if (eptr >= mb->end_subject)
4619             {
4620             SCHECK_PARTIAL();
4621             RRETURN(MATCH_NOMATCH);
4622             }
4623           GETCHARINC(c, eptr);
4624           if (c < 128 && (mb->ctypes[c] & ctype_digit) != 0)
4625             RRETURN(MATCH_NOMATCH);
4626           }
4627         break;
4628 
4629         case OP_DIGIT:
4630         for (i = 1; i <= min; i++)
4631           {
4632           uint32_t cc;
4633           if (eptr >= mb->end_subject)
4634             {
4635             SCHECK_PARTIAL();
4636             RRETURN(MATCH_NOMATCH);
4637             }
4638           cc = UCHAR21(eptr);
4639           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
4640             RRETURN(MATCH_NOMATCH);
4641           eptr++;
4642           /* No need to skip more bytes - we know it's a 1-byte character */
4643           }
4644         break;
4645 
4646         case OP_NOT_WHITESPACE:
4647         for (i = 1; i <= min; i++)
4648           {
4649           uint32_t cc;
4650           if (eptr >= mb->end_subject)
4651             {
4652             SCHECK_PARTIAL();
4653             RRETURN(MATCH_NOMATCH);
4654             }
4655           cc = UCHAR21(eptr);
4656           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
4657             RRETURN(MATCH_NOMATCH);
4658           eptr++;
4659           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
4660           }
4661         break;
4662 
4663         case OP_WHITESPACE:
4664         for (i = 1; i <= min; i++)
4665           {
4666           uint32_t cc;
4667           if (eptr >= mb->end_subject)
4668             {
4669             SCHECK_PARTIAL();
4670             RRETURN(MATCH_NOMATCH);
4671             }
4672           cc = UCHAR21(eptr);
4673           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
4674             RRETURN(MATCH_NOMATCH);
4675           eptr++;
4676           /* No need to skip more bytes - we know it's a 1-byte character */
4677           }
4678         break;
4679 
4680         case OP_NOT_WORDCHAR:
4681         for (i = 1; i <= min; i++)
4682           {
4683           uint32_t cc;
4684           if (eptr >= mb->end_subject)
4685             {
4686             SCHECK_PARTIAL();
4687             RRETURN(MATCH_NOMATCH);
4688             }
4689           cc = UCHAR21(eptr);
4690           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
4691             RRETURN(MATCH_NOMATCH);
4692           eptr++;
4693           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
4694           }
4695         break;
4696 
4697         case OP_WORDCHAR:
4698         for (i = 1; i <= min; i++)
4699           {
4700           uint32_t cc;
4701           if (eptr >= mb->end_subject)
4702             {
4703             SCHECK_PARTIAL();
4704             RRETURN(MATCH_NOMATCH);
4705             }
4706           cc = UCHAR21(eptr);
4707           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
4708             RRETURN(MATCH_NOMATCH);
4709           eptr++;
4710           /* No need to skip more bytes - we know it's a 1-byte character */
4711           }
4712         break;
4713 
4714         default:
4715         RRETURN(PCRE2_ERROR_INTERNAL);
4716         }  /* End switch(ctype) */
4717 
4718       else
4719 #endif     /* SUPPORT_UNICODE */
4720 
4721       /* Code for the non-UTF-8 case for minimum matching of operators other
4722       than OP_PROP and OP_NOTPROP. */
4723 
4724       switch(ctype)
4725         {
4726         case OP_ANY:
4727         for (i = 1; i <= min; i++)
4728           {
4729           if (eptr >= mb->end_subject)
4730             {
4731             SCHECK_PARTIAL();
4732             RRETURN(MATCH_NOMATCH);
4733             }
4734           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4735           if (mb->partial != 0 &&
4736               eptr + 1 >= mb->end_subject &&
4737               NLBLOCK->nltype == NLTYPE_FIXED &&
4738               NLBLOCK->nllen == 2 &&
4739               *eptr == NLBLOCK->nl[0])
4740             {
4741             mb->hitend = TRUE;
4742             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
4743             }
4744           eptr++;
4745           }
4746         break;
4747 
4748         case OP_ALLANY:
4749         if (eptr > mb->end_subject - min)
4750           {
4751           SCHECK_PARTIAL();
4752           RRETURN(MATCH_NOMATCH);
4753           }
4754         eptr += min;
4755         break;
4756 
4757         case OP_ANYBYTE:
4758         if (eptr > mb->end_subject - min)
4759           {
4760           SCHECK_PARTIAL();
4761           RRETURN(MATCH_NOMATCH);
4762           }
4763         eptr += min;
4764         break;
4765 
4766         case OP_ANYNL:
4767         for (i = 1; i <= min; i++)
4768           {
4769           if (eptr >= mb->end_subject)
4770             {
4771             SCHECK_PARTIAL();
4772             RRETURN(MATCH_NOMATCH);
4773             }
4774           switch(*eptr++)
4775             {
4776             default: RRETURN(MATCH_NOMATCH);
4777 
4778             case CHAR_CR:
4779             if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
4780             break;
4781 
4782             case CHAR_LF:
4783             break;
4784 
4785             case CHAR_VT:
4786             case CHAR_FF:
4787             case CHAR_NEL:
4788 #if PCRE2_CODE_UNIT_WIDTH != 8
4789             case 0x2028:
4790             case 0x2029:
4791 #endif
4792             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
4793             break;
4794             }
4795           }
4796         break;
4797 
4798         case OP_NOT_HSPACE:
4799         for (i = 1; i <= min; i++)
4800           {
4801           if (eptr >= mb->end_subject)
4802             {
4803             SCHECK_PARTIAL();
4804             RRETURN(MATCH_NOMATCH);
4805             }
4806           switch(*eptr++)
4807             {
4808             default: break;
4809             HSPACE_BYTE_CASES:
4810 #if PCRE2_CODE_UNIT_WIDTH != 8
4811             HSPACE_MULTIBYTE_CASES:
4812 #endif
4813             RRETURN(MATCH_NOMATCH);
4814             }
4815           }
4816         break;
4817 
4818         case OP_HSPACE:
4819         for (i = 1; i <= min; i++)
4820           {
4821           if (eptr >= mb->end_subject)
4822             {
4823             SCHECK_PARTIAL();
4824             RRETURN(MATCH_NOMATCH);
4825             }
4826           switch(*eptr++)
4827             {
4828             default: RRETURN(MATCH_NOMATCH);
4829             HSPACE_BYTE_CASES:
4830 #if PCRE2_CODE_UNIT_WIDTH != 8
4831             HSPACE_MULTIBYTE_CASES:
4832 #endif
4833             break;
4834             }
4835           }
4836         break;
4837 
4838         case OP_NOT_VSPACE:
4839         for (i = 1; i <= min; i++)
4840           {
4841           if (eptr >= mb->end_subject)
4842             {
4843             SCHECK_PARTIAL();
4844             RRETURN(MATCH_NOMATCH);
4845             }
4846           switch(*eptr++)
4847             {
4848             VSPACE_BYTE_CASES:
4849 #if PCRE2_CODE_UNIT_WIDTH != 8
4850             VSPACE_MULTIBYTE_CASES:
4851 #endif
4852             RRETURN(MATCH_NOMATCH);
4853             default: break;
4854             }
4855           }
4856         break;
4857 
4858         case OP_VSPACE:
4859         for (i = 1; i <= min; i++)
4860           {
4861           if (eptr >= mb->end_subject)
4862             {
4863             SCHECK_PARTIAL();
4864             RRETURN(MATCH_NOMATCH);
4865             }
4866           switch(*eptr++)
4867             {
4868             default: RRETURN(MATCH_NOMATCH);
4869             VSPACE_BYTE_CASES:
4870 #if PCRE2_CODE_UNIT_WIDTH != 8
4871             VSPACE_MULTIBYTE_CASES:
4872 #endif
4873             break;
4874             }
4875           }
4876         break;
4877 
4878         case OP_NOT_DIGIT:
4879         for (i = 1; i <= min; i++)
4880           {
4881           if (eptr >= mb->end_subject)
4882             {
4883             SCHECK_PARTIAL();
4884             RRETURN(MATCH_NOMATCH);
4885             }
4886           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0)
4887             RRETURN(MATCH_NOMATCH);
4888           eptr++;
4889           }
4890         break;
4891 
4892         case OP_DIGIT:
4893         for (i = 1; i <= min; i++)
4894           {
4895           if (eptr >= mb->end_subject)
4896             {
4897             SCHECK_PARTIAL();
4898             RRETURN(MATCH_NOMATCH);
4899             }
4900           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0)
4901             RRETURN(MATCH_NOMATCH);
4902           eptr++;
4903           }
4904         break;
4905 
4906         case OP_NOT_WHITESPACE:
4907         for (i = 1; i <= min; i++)
4908           {
4909           if (eptr >= mb->end_subject)
4910             {
4911             SCHECK_PARTIAL();
4912             RRETURN(MATCH_NOMATCH);
4913             }
4914           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0)
4915             RRETURN(MATCH_NOMATCH);
4916           eptr++;
4917           }
4918         break;
4919 
4920         case OP_WHITESPACE:
4921         for (i = 1; i <= min; i++)
4922           {
4923           if (eptr >= mb->end_subject)
4924             {
4925             SCHECK_PARTIAL();
4926             RRETURN(MATCH_NOMATCH);
4927             }
4928           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0)
4929             RRETURN(MATCH_NOMATCH);
4930           eptr++;
4931           }
4932         break;
4933 
4934         case OP_NOT_WORDCHAR:
4935         for (i = 1; i <= min; i++)
4936           {
4937           if (eptr >= mb->end_subject)
4938             {
4939             SCHECK_PARTIAL();
4940             RRETURN(MATCH_NOMATCH);
4941             }
4942           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0)
4943             RRETURN(MATCH_NOMATCH);
4944           eptr++;
4945           }
4946         break;
4947 
4948         case OP_WORDCHAR:
4949         for (i = 1; i <= min; i++)
4950           {
4951           if (eptr >= mb->end_subject)
4952             {
4953             SCHECK_PARTIAL();
4954             RRETURN(MATCH_NOMATCH);
4955             }
4956           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0)
4957             RRETURN(MATCH_NOMATCH);
4958           eptr++;
4959           }
4960         break;
4961 
4962         default:
4963         RRETURN(PCRE2_ERROR_INTERNAL);
4964         }
4965       }
4966 
4967     /* If min = max, continue at the same level without recursing */
4968 
4969     if (min == max) continue;
4970 
4971     /* If minimizing, we have to test the rest of the pattern before each
4972     subsequent match. Again, separate the UTF-8 case for speed, and also
4973     separate the UCP cases. */
4974 
4975     if (minimize)
4976       {
4977 #ifdef SUPPORT_UNICODE
4978       if (prop_type >= 0)
4979         {
4980         switch(prop_type)
4981           {
4982           case PT_ANY:
4983           for (fi = min;; fi++)
4984             {
4985             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM36);
4986             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4987             if (fi >= max) RRETURN(MATCH_NOMATCH);
4988             if (eptr >= mb->end_subject)
4989               {
4990               SCHECK_PARTIAL();
4991               RRETURN(MATCH_NOMATCH);
4992               }
4993             GETCHARINCTEST(c, eptr);
4994             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4995             }
4996           /* Control never gets here */
4997 
4998           case PT_LAMP:
4999           for (fi = min;; fi++)
5000             {
5001             int chartype;
5002             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM37);
5003             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5004             if (fi >= max) RRETURN(MATCH_NOMATCH);
5005             if (eptr >= mb->end_subject)
5006               {
5007               SCHECK_PARTIAL();
5008               RRETURN(MATCH_NOMATCH);
5009               }
5010             GETCHARINCTEST(c, eptr);
5011             chartype = UCD_CHARTYPE(c);
5012             if ((chartype == ucp_Lu ||
5013                  chartype == ucp_Ll ||
5014                  chartype == ucp_Lt) == prop_fail_result)
5015               RRETURN(MATCH_NOMATCH);
5016             }
5017           /* Control never gets here */
5018 
5019           case PT_GC:
5020           for (fi = min;; fi++)
5021             {
5022             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM38);
5023             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5024             if (fi >= max) RRETURN(MATCH_NOMATCH);
5025             if (eptr >= mb->end_subject)
5026               {
5027               SCHECK_PARTIAL();
5028               RRETURN(MATCH_NOMATCH);
5029               }
5030             GETCHARINCTEST(c, eptr);
5031             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
5032               RRETURN(MATCH_NOMATCH);
5033             }
5034           /* Control never gets here */
5035 
5036           case PT_PC:
5037           for (fi = min;; fi++)
5038             {
5039             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM39);
5040             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5041             if (fi >= max) RRETURN(MATCH_NOMATCH);
5042             if (eptr >= mb->end_subject)
5043               {
5044               SCHECK_PARTIAL();
5045               RRETURN(MATCH_NOMATCH);
5046               }
5047             GETCHARINCTEST(c, eptr);
5048             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
5049               RRETURN(MATCH_NOMATCH);
5050             }
5051           /* Control never gets here */
5052 
5053           case PT_SC:
5054           for (fi = min;; fi++)
5055             {
5056             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM40);
5057             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5058             if (fi >= max) RRETURN(MATCH_NOMATCH);
5059             if (eptr >= mb->end_subject)
5060               {
5061               SCHECK_PARTIAL();
5062               RRETURN(MATCH_NOMATCH);
5063               }
5064             GETCHARINCTEST(c, eptr);
5065             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
5066               RRETURN(MATCH_NOMATCH);
5067             }
5068           /* Control never gets here */
5069 
5070           case PT_ALNUM:
5071           for (fi = min;; fi++)
5072             {
5073             int category;
5074             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM59);
5075             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5076             if (fi >= max) RRETURN(MATCH_NOMATCH);
5077             if (eptr >= mb->end_subject)
5078               {
5079               SCHECK_PARTIAL();
5080               RRETURN(MATCH_NOMATCH);
5081               }
5082             GETCHARINCTEST(c, eptr);
5083             category = UCD_CATEGORY(c);
5084             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5085               RRETURN(MATCH_NOMATCH);
5086             }
5087           /* Control never gets here */
5088 
5089           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5090           which means that Perl space and POSIX space are now identical. PCRE
5091           was changed at release 8.34. */
5092 
5093           case PT_SPACE:    /* Perl space */
5094           case PT_PXSPACE:  /* POSIX space */
5095           for (fi = min;; fi++)
5096             {
5097             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM61);
5098             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5099             if (fi >= max) RRETURN(MATCH_NOMATCH);
5100             if (eptr >= mb->end_subject)
5101               {
5102               SCHECK_PARTIAL();
5103               RRETURN(MATCH_NOMATCH);
5104               }
5105             GETCHARINCTEST(c, eptr);
5106             switch(c)
5107               {
5108               HSPACE_CASES:
5109               VSPACE_CASES:
5110               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5111               break;
5112 
5113               default:
5114               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5115                 RRETURN(MATCH_NOMATCH);
5116               break;
5117               }
5118             }
5119           /* Control never gets here */
5120 
5121           case PT_WORD:
5122           for (fi = min;; fi++)
5123             {
5124             int category;
5125             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM62);
5126             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5127             if (fi >= max) RRETURN(MATCH_NOMATCH);
5128             if (eptr >= mb->end_subject)
5129               {
5130               SCHECK_PARTIAL();
5131               RRETURN(MATCH_NOMATCH);
5132               }
5133             GETCHARINCTEST(c, eptr);
5134             category = UCD_CATEGORY(c);
5135             if ((category == ucp_L ||
5136                  category == ucp_N ||
5137                  c == CHAR_UNDERSCORE)
5138                    == prop_fail_result)
5139               RRETURN(MATCH_NOMATCH);
5140             }
5141           /* Control never gets here */
5142 
5143           case PT_CLIST:
5144           for (fi = min;; fi++)
5145             {
5146             const uint32_t *cp;
5147             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM67);
5148             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5149             if (fi >= max) RRETURN(MATCH_NOMATCH);
5150             if (eptr >= mb->end_subject)
5151               {
5152               SCHECK_PARTIAL();
5153               RRETURN(MATCH_NOMATCH);
5154               }
5155             GETCHARINCTEST(c, eptr);
5156             cp = PRIV(ucd_caseless_sets) + prop_value;
5157             for (;;)
5158               {
5159               if (c < *cp)
5160                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5161               if (c == *cp++)
5162                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5163               }
5164             }
5165           /* Control never gets here */
5166 
5167           case PT_UCNC:
5168           for (fi = min;; fi++)
5169             {
5170             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM60);
5171             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5172             if (fi >= max) RRETURN(MATCH_NOMATCH);
5173             if (eptr >= mb->end_subject)
5174               {
5175               SCHECK_PARTIAL();
5176               RRETURN(MATCH_NOMATCH);
5177               }
5178             GETCHARINCTEST(c, eptr);
5179             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5180                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5181                  c >= 0xe000) == prop_fail_result)
5182               RRETURN(MATCH_NOMATCH);
5183             }
5184           /* Control never gets here */
5185 
5186           /* This should never occur */
5187           default:
5188           RRETURN(PCRE2_ERROR_INTERNAL);
5189           }
5190         }
5191 
5192       /* Match extended Unicode sequences. We will get here only if the
5193       support is in the binary; otherwise a compile-time error occurs. */
5194 
5195       else if (ctype == OP_EXTUNI)
5196         {
5197         for (fi = min;; fi++)
5198           {
5199           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM41);
5200           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5201           if (fi >= max) RRETURN(MATCH_NOMATCH);
5202           if (eptr >= mb->end_subject)
5203             {
5204             SCHECK_PARTIAL();
5205             RRETURN(MATCH_NOMATCH);
5206             }
5207           else
5208             {
5209             int lgb, rgb;
5210             GETCHARINCTEST(c, eptr);
5211             lgb = UCD_GRAPHBREAK(c);
5212             while (eptr < mb->end_subject)
5213               {
5214               int len = 1;
5215               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5216               rgb = UCD_GRAPHBREAK(c);
5217               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5218               lgb = rgb;
5219               eptr += len;
5220               }
5221             }
5222           CHECK_PARTIAL();
5223           }
5224         }
5225       else
5226 #endif     /* SUPPORT_UNICODE */
5227 
5228 #ifdef SUPPORT_UNICODE
5229       if (utf)
5230         {
5231         for (fi = min;; fi++)
5232           {
5233           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM42);
5234           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5235           if (fi >= max) RRETURN(MATCH_NOMATCH);
5236           if (eptr >= mb->end_subject)
5237             {
5238             SCHECK_PARTIAL();
5239             RRETURN(MATCH_NOMATCH);
5240             }
5241           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5242             RRETURN(MATCH_NOMATCH);
5243           GETCHARINC(c, eptr);
5244           switch(ctype)
5245             {
5246             case OP_ANY:               /* This is the non-NL case */
5247             if (mb->partial != 0 &&    /* Take care with CRLF partial */
5248                 eptr >= mb->end_subject &&
5249                 NLBLOCK->nltype == NLTYPE_FIXED &&
5250                 NLBLOCK->nllen == 2 &&
5251                 c == NLBLOCK->nl[0])
5252               {
5253               mb->hitend = TRUE;
5254               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
5255               }
5256             break;
5257 
5258             case OP_ALLANY:
5259             case OP_ANYBYTE:
5260             break;
5261 
5262             case OP_ANYNL:
5263             switch(c)
5264               {
5265               default: RRETURN(MATCH_NOMATCH);
5266               case CHAR_CR:
5267               if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5268               break;
5269 
5270               case CHAR_LF:
5271               break;
5272 
5273               case CHAR_VT:
5274               case CHAR_FF:
5275               case CHAR_NEL:
5276 #ifndef EBCDIC
5277               case 0x2028:
5278               case 0x2029:
5279 #endif  /* Not EBCDIC */
5280               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
5281               break;
5282               }
5283             break;
5284 
5285             case OP_NOT_HSPACE:
5286             switch(c)
5287               {
5288               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5289               default: break;
5290               }
5291             break;
5292 
5293             case OP_HSPACE:
5294             switch(c)
5295               {
5296               HSPACE_CASES: break;
5297               default: RRETURN(MATCH_NOMATCH);
5298               }
5299             break;
5300 
5301             case OP_NOT_VSPACE:
5302             switch(c)
5303               {
5304               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5305               default: break;
5306               }
5307             break;
5308 
5309             case OP_VSPACE:
5310             switch(c)
5311               {
5312               VSPACE_CASES: break;
5313               default: RRETURN(MATCH_NOMATCH);
5314               }
5315             break;
5316 
5317             case OP_NOT_DIGIT:
5318             if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0)
5319               RRETURN(MATCH_NOMATCH);
5320             break;
5321 
5322             case OP_DIGIT:
5323             if (c >= 256 || (mb->ctypes[c] & ctype_digit) == 0)
5324               RRETURN(MATCH_NOMATCH);
5325             break;
5326 
5327             case OP_NOT_WHITESPACE:
5328             if (c < 256 && (mb->ctypes[c] & ctype_space) != 0)
5329               RRETURN(MATCH_NOMATCH);
5330             break;
5331 
5332             case OP_WHITESPACE:
5333             if (c >= 256 || (mb->ctypes[c] & ctype_space) == 0)
5334               RRETURN(MATCH_NOMATCH);
5335             break;
5336 
5337             case OP_NOT_WORDCHAR:
5338             if (c < 256 && (mb->ctypes[c] & ctype_word) != 0)
5339               RRETURN(MATCH_NOMATCH);
5340             break;
5341 
5342             case OP_WORDCHAR:
5343             if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0)
5344               RRETURN(MATCH_NOMATCH);
5345             break;
5346 
5347             default:
5348             RRETURN(PCRE2_ERROR_INTERNAL);
5349             }
5350           }
5351         }
5352       else
5353 #endif
5354       /* Not UTF mode */
5355         {
5356         for (fi = min;; fi++)
5357           {
5358           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM43);
5359           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5360           if (fi >= max) RRETURN(MATCH_NOMATCH);
5361           if (eptr >= mb->end_subject)
5362             {
5363             SCHECK_PARTIAL();
5364             RRETURN(MATCH_NOMATCH);
5365             }
5366           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5367             RRETURN(MATCH_NOMATCH);
5368           c = *eptr++;
5369           switch(ctype)
5370             {
5371             case OP_ANY:               /* This is the non-NL case */
5372             if (mb->partial != 0 &&    /* Take care with CRLF partial */
5373                 eptr >= mb->end_subject &&
5374                 NLBLOCK->nltype == NLTYPE_FIXED &&
5375                 NLBLOCK->nllen == 2 &&
5376                 c == NLBLOCK->nl[0])
5377               {
5378               mb->hitend = TRUE;
5379               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
5380               }
5381             break;
5382 
5383             case OP_ALLANY:
5384             case OP_ANYBYTE:
5385             break;
5386 
5387             case OP_ANYNL:
5388             switch(c)
5389               {
5390               default: RRETURN(MATCH_NOMATCH);
5391               case CHAR_CR:
5392               if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
5393               break;
5394 
5395               case CHAR_LF:
5396               break;
5397 
5398               case CHAR_VT:
5399               case CHAR_FF:
5400               case CHAR_NEL:
5401 #if PCRE2_CODE_UNIT_WIDTH != 8
5402               case 0x2028:
5403               case 0x2029:
5404 #endif
5405               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
5406               break;
5407               }
5408             break;
5409 
5410             case OP_NOT_HSPACE:
5411             switch(c)
5412               {
5413               default: break;
5414               HSPACE_BYTE_CASES:
5415 #if PCRE2_CODE_UNIT_WIDTH != 8
5416               HSPACE_MULTIBYTE_CASES:
5417 #endif
5418               RRETURN(MATCH_NOMATCH);
5419               }
5420             break;
5421 
5422             case OP_HSPACE:
5423             switch(c)
5424               {
5425               default: RRETURN(MATCH_NOMATCH);
5426               HSPACE_BYTE_CASES:
5427 #if PCRE2_CODE_UNIT_WIDTH != 8
5428               HSPACE_MULTIBYTE_CASES:
5429 #endif
5430               break;
5431               }
5432             break;
5433 
5434             case OP_NOT_VSPACE:
5435             switch(c)
5436               {
5437               default: break;
5438               VSPACE_BYTE_CASES:
5439 #if PCRE2_CODE_UNIT_WIDTH != 8
5440               VSPACE_MULTIBYTE_CASES:
5441 #endif
5442               RRETURN(MATCH_NOMATCH);
5443               }
5444             break;
5445 
5446             case OP_VSPACE:
5447             switch(c)
5448               {
5449               default: RRETURN(MATCH_NOMATCH);
5450               VSPACE_BYTE_CASES:
5451 #if PCRE2_CODE_UNIT_WIDTH != 8
5452               VSPACE_MULTIBYTE_CASES:
5453 #endif
5454               break;
5455               }
5456             break;
5457 
5458             case OP_NOT_DIGIT:
5459             if (MAX_255(c) && (mb->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5460             break;
5461 
5462             case OP_DIGIT:
5463             if (!MAX_255(c) || (mb->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5464             break;
5465 
5466             case OP_NOT_WHITESPACE:
5467             if (MAX_255(c) && (mb->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5468             break;
5469 
5470             case OP_WHITESPACE:
5471             if (!MAX_255(c) || (mb->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5472             break;
5473 
5474             case OP_NOT_WORDCHAR:
5475             if (MAX_255(c) && (mb->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5476             break;
5477 
5478             case OP_WORDCHAR:
5479             if (!MAX_255(c) || (mb->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5480             break;
5481 
5482             default:
5483             RRETURN(PCRE2_ERROR_INTERNAL);
5484             }
5485           }
5486         }
5487       /* Control never gets here */
5488       }
5489 
5490     /* If maximizing, it is worth using inline code for speed, doing the type
5491     test once at the start (i.e. keep it out of the loop). Again, keep the
5492     UTF-8 and UCP stuff separate. */
5493 
5494     else
5495       {
5496       pp = eptr;  /* Remember where we started */
5497 
5498 #ifdef SUPPORT_UNICODE
5499       if (prop_type >= 0)
5500         {
5501         switch(prop_type)
5502           {
5503           case PT_ANY:
5504           for (i = min; i < max; i++)
5505             {
5506             int len = 1;
5507             if (eptr >= mb->end_subject)
5508               {
5509               SCHECK_PARTIAL();
5510               break;
5511               }
5512             GETCHARLENTEST(c, eptr, len);
5513             if (prop_fail_result) break;
5514             eptr+= len;
5515             }
5516           break;
5517 
5518           case PT_LAMP:
5519           for (i = min; i < max; i++)
5520             {
5521             int chartype;
5522             int len = 1;
5523             if (eptr >= mb->end_subject)
5524               {
5525               SCHECK_PARTIAL();
5526               break;
5527               }
5528             GETCHARLENTEST(c, eptr, len);
5529             chartype = UCD_CHARTYPE(c);
5530             if ((chartype == ucp_Lu ||
5531                  chartype == ucp_Ll ||
5532                  chartype == ucp_Lt) == prop_fail_result)
5533               break;
5534             eptr+= len;
5535             }
5536           break;
5537 
5538           case PT_GC:
5539           for (i = min; i < max; i++)
5540             {
5541             int len = 1;
5542             if (eptr >= mb->end_subject)
5543               {
5544               SCHECK_PARTIAL();
5545               break;
5546               }
5547             GETCHARLENTEST(c, eptr, len);
5548             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5549             eptr+= len;
5550             }
5551           break;
5552 
5553           case PT_PC:
5554           for (i = min; i < max; i++)
5555             {
5556             int len = 1;
5557             if (eptr >= mb->end_subject)
5558               {
5559               SCHECK_PARTIAL();
5560               break;
5561               }
5562             GETCHARLENTEST(c, eptr, len);
5563             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5564             eptr+= len;
5565             }
5566           break;
5567 
5568           case PT_SC:
5569           for (i = min; i < max; i++)
5570             {
5571             int len = 1;
5572             if (eptr >= mb->end_subject)
5573               {
5574               SCHECK_PARTIAL();
5575               break;
5576               }
5577             GETCHARLENTEST(c, eptr, len);
5578             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5579             eptr+= len;
5580             }
5581           break;
5582 
5583           case PT_ALNUM:
5584           for (i = min; i < max; i++)
5585             {
5586             int category;
5587             int len = 1;
5588             if (eptr >= mb->end_subject)
5589               {
5590               SCHECK_PARTIAL();
5591               break;
5592               }
5593             GETCHARLENTEST(c, eptr, len);
5594             category = UCD_CATEGORY(c);
5595             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5596               break;
5597             eptr+= len;
5598             }
5599           break;
5600 
5601           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5602           which means that Perl space and POSIX space are now identical. PCRE
5603           was changed at release 8.34. */
5604 
5605           case PT_SPACE:    /* Perl space */
5606           case PT_PXSPACE:  /* POSIX space */
5607           for (i = min; i < max; i++)
5608             {
5609             int len = 1;
5610             if (eptr >= mb->end_subject)
5611               {
5612               SCHECK_PARTIAL();
5613               break;
5614               }
5615             GETCHARLENTEST(c, eptr, len);
5616             switch(c)
5617               {
5618               HSPACE_CASES:
5619               VSPACE_CASES:
5620               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5621               break;
5622 
5623               default:
5624               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5625                 goto ENDLOOP99;   /* Break the loop */
5626               break;
5627               }
5628             eptr+= len;
5629             }
5630           ENDLOOP99:
5631           break;
5632 
5633           case PT_WORD:
5634           for (i = min; i < max; i++)
5635             {
5636             int category;
5637             int len = 1;
5638             if (eptr >= mb->end_subject)
5639               {
5640               SCHECK_PARTIAL();
5641               break;
5642               }
5643             GETCHARLENTEST(c, eptr, len);
5644             category = UCD_CATEGORY(c);
5645             if ((category == ucp_L || category == ucp_N ||
5646                  c == CHAR_UNDERSCORE) == prop_fail_result)
5647               break;
5648             eptr+= len;
5649             }
5650           break;
5651 
5652           case PT_CLIST:
5653           for (i = min; i < max; i++)
5654             {
5655             const uint32_t *cp;
5656             int len = 1;
5657             if (eptr >= mb->end_subject)
5658               {
5659               SCHECK_PARTIAL();
5660               break;
5661               }
5662             GETCHARLENTEST(c, eptr, len);
5663             cp = PRIV(ucd_caseless_sets) + prop_value;
5664             for (;;)
5665               {
5666               if (c < *cp)
5667                 { if (prop_fail_result) break; else goto GOT_MAX; }
5668               if (c == *cp++)
5669                 { if (prop_fail_result) goto GOT_MAX; else break; }
5670               }
5671             eptr += len;
5672             }
5673           GOT_MAX:
5674           break;
5675 
5676           case PT_UCNC:
5677           for (i = min; i < max; i++)
5678             {
5679             int len = 1;
5680             if (eptr >= mb->end_subject)
5681               {
5682               SCHECK_PARTIAL();
5683               break;
5684               }
5685             GETCHARLENTEST(c, eptr, len);
5686             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5687                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5688                  c >= 0xe000) == prop_fail_result)
5689               break;
5690             eptr += len;
5691             }
5692           break;
5693 
5694           default:
5695           RRETURN(PCRE2_ERROR_INTERNAL);
5696           }
5697 
5698         /* eptr is now past the end of the maximum run */
5699 
5700         if (possessive) continue;    /* No backtracking */
5701 
5702         /* After \C in UTF mode, pp might be in the middle of a Unicode
5703         character. Use <= pp to ensure backtracking doesn't go too far. */
5704 
5705         for(;;)
5706           {
5707           if (eptr <= pp) goto TAIL_RECURSE;
5708           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44);
5709           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5710           eptr--;
5711           if (utf) BACKCHAR(eptr);
5712           }
5713         }
5714 
5715       /* Match extended Unicode grapheme clusters. We will get here only if the
5716       support is in the binary; otherwise a compile-time error occurs. */
5717 
5718       else if (ctype == OP_EXTUNI)
5719         {
5720         for (i = min; i < max; i++)
5721           {
5722           if (eptr >= mb->end_subject)
5723             {
5724             SCHECK_PARTIAL();
5725             break;
5726             }
5727           else
5728             {
5729             int lgb, rgb;
5730             GETCHARINCTEST(c, eptr);
5731             lgb = UCD_GRAPHBREAK(c);
5732             while (eptr < mb->end_subject)
5733               {
5734               int len = 1;
5735               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5736               rgb = UCD_GRAPHBREAK(c);
5737               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5738               lgb = rgb;
5739               eptr += len;
5740               }
5741             }
5742           CHECK_PARTIAL();
5743           }
5744 
5745         /* eptr is now past the end of the maximum run */
5746 
5747         if (possessive) continue;    /* No backtracking */
5748 
5749         /* We use <= pp rather than == pp to detect the start of the run while
5750         backtracking because the use of \C in UTF mode can cause BACKCHAR to
5751         move back past pp. This is just palliative; the use of \C in UTF mode
5752         is fraught with danger. */
5753 
5754         for(;;)
5755           {
5756           int lgb, rgb;
5757           PCRE2_SPTR fptr;
5758 
5759           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5760           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
5761           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5762 
5763           /* Backtracking over an extended grapheme cluster involves inspecting
5764           the previous two characters (if present) to see if a break is
5765           permitted between them. */
5766 
5767           eptr--;
5768           if (!utf) c = *eptr; else
5769             {
5770             BACKCHAR(eptr);
5771             GETCHAR(c, eptr);
5772             }
5773           rgb = UCD_GRAPHBREAK(c);
5774 
5775           for (;;)
5776             {
5777             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5778             fptr = eptr - 1;
5779             if (!utf) c = *fptr; else
5780               {
5781               BACKCHAR(fptr);
5782               GETCHAR(c, fptr);
5783               }
5784             lgb = UCD_GRAPHBREAK(c);
5785             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5786             eptr = fptr;
5787             rgb = lgb;
5788             }
5789           }
5790         }
5791 
5792       else
5793 #endif   /* SUPPORT_UNICODE */
5794 
5795 #ifdef SUPPORT_UNICODE
5796       if (utf)
5797         {
5798         switch(ctype)
5799           {
5800           case OP_ANY:
5801           for (i = min; i < max; i++)
5802             {
5803             if (eptr >= mb->end_subject)
5804               {
5805               SCHECK_PARTIAL();
5806               break;
5807               }
5808             if (IS_NEWLINE(eptr)) break;
5809             if (mb->partial != 0 &&    /* Take care with CRLF partial */
5810                 eptr + 1 >= mb->end_subject &&
5811                 NLBLOCK->nltype == NLTYPE_FIXED &&
5812                 NLBLOCK->nllen == 2 &&
5813                 UCHAR21(eptr) == NLBLOCK->nl[0])
5814               {
5815               mb->hitend = TRUE;
5816               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
5817               }
5818             eptr++;
5819             ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
5820             }
5821           break;
5822 
5823           case OP_ALLANY:
5824           if (max < INT_MAX)
5825             {
5826             for (i = min; i < max; i++)
5827               {
5828               if (eptr >= mb->end_subject)
5829                 {
5830                 SCHECK_PARTIAL();
5831                 break;
5832                 }
5833               eptr++;
5834               ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
5835               }
5836             }
5837           else
5838             {
5839             eptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
5840             SCHECK_PARTIAL();
5841             }
5842           break;
5843 
5844           /* The byte case is the same as non-UTF8 */
5845 
5846           case OP_ANYBYTE:
5847           c = max - min;
5848           if (c > (uint32_t)(mb->end_subject - eptr))
5849             {
5850             eptr = mb->end_subject;
5851             SCHECK_PARTIAL();
5852             }
5853           else eptr += c;
5854           break;
5855 
5856           case OP_ANYNL:
5857           for (i = min; i < max; i++)
5858             {
5859             int len = 1;
5860             if (eptr >= mb->end_subject)
5861               {
5862               SCHECK_PARTIAL();
5863               break;
5864               }
5865             GETCHARLEN(c, eptr, len);
5866             if (c == CHAR_CR)
5867               {
5868               if (++eptr >= mb->end_subject) break;
5869               if (UCHAR21(eptr) == CHAR_LF) eptr++;
5870               }
5871             else
5872               {
5873               if (c != CHAR_LF &&
5874                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
5875                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5876 #ifndef EBCDIC
5877                     && c != 0x2028 && c != 0x2029
5878 #endif  /* Not EBCDIC */
5879                     )))
5880                 break;
5881               eptr += len;
5882               }
5883             }
5884           break;
5885 
5886           case OP_NOT_HSPACE:
5887           case OP_HSPACE:
5888           for (i = min; i < max; i++)
5889             {
5890             BOOL gotspace;
5891             int len = 1;
5892             if (eptr >= mb->end_subject)
5893               {
5894               SCHECK_PARTIAL();
5895               break;
5896               }
5897             GETCHARLEN(c, eptr, len);
5898             switch(c)
5899               {
5900               HSPACE_CASES: gotspace = TRUE; break;
5901               default: gotspace = FALSE; break;
5902               }
5903             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5904             eptr += len;
5905             }
5906           break;
5907 
5908           case OP_NOT_VSPACE:
5909           case OP_VSPACE:
5910           for (i = min; i < max; i++)
5911             {
5912             BOOL gotspace;
5913             int len = 1;
5914             if (eptr >= mb->end_subject)
5915               {
5916               SCHECK_PARTIAL();
5917               break;
5918               }
5919             GETCHARLEN(c, eptr, len);
5920             switch(c)
5921               {
5922               VSPACE_CASES: gotspace = TRUE; break;
5923               default: gotspace = FALSE; break;
5924               }
5925             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5926             eptr += len;
5927             }
5928           break;
5929 
5930           case OP_NOT_DIGIT:
5931           for (i = min; i < max; i++)
5932             {
5933             int len = 1;
5934             if (eptr >= mb->end_subject)
5935               {
5936               SCHECK_PARTIAL();
5937               break;
5938               }
5939             GETCHARLEN(c, eptr, len);
5940             if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0) break;
5941             eptr+= len;
5942             }
5943           break;
5944 
5945           case OP_DIGIT:
5946           for (i = min; i < max; i++)
5947             {
5948             int len = 1;
5949             if (eptr >= mb->end_subject)
5950               {
5951               SCHECK_PARTIAL();
5952               break;
5953               }
5954             GETCHARLEN(c, eptr, len);
5955             if (c >= 256 ||(mb->ctypes[c] & ctype_digit) == 0) break;
5956             eptr+= len;
5957             }
5958           break;
5959 
5960           case OP_NOT_WHITESPACE:
5961           for (i = min; i < max; i++)
5962             {
5963             int len = 1;
5964             if (eptr >= mb->end_subject)
5965               {
5966               SCHECK_PARTIAL();
5967               break;
5968               }
5969             GETCHARLEN(c, eptr, len);
5970             if (c < 256 && (mb->ctypes[c] & ctype_space) != 0) break;
5971             eptr+= len;
5972             }
5973           break;
5974 
5975           case OP_WHITESPACE:
5976           for (i = min; i < max; i++)
5977             {
5978             int len = 1;
5979             if (eptr >= mb->end_subject)
5980               {
5981               SCHECK_PARTIAL();
5982               break;
5983               }
5984             GETCHARLEN(c, eptr, len);
5985             if (c >= 256 ||(mb->ctypes[c] & ctype_space) == 0) break;
5986             eptr+= len;
5987             }
5988           break;
5989 
5990           case OP_NOT_WORDCHAR:
5991           for (i = min; i < max; i++)
5992             {
5993             int len = 1;
5994             if (eptr >= mb->end_subject)
5995               {
5996               SCHECK_PARTIAL();
5997               break;
5998               }
5999             GETCHARLEN(c, eptr, len);
6000             if (c < 256 && (mb->ctypes[c] & ctype_word) != 0) break;
6001             eptr+= len;
6002             }
6003           break;
6004 
6005           case OP_WORDCHAR:
6006           for (i = min; i < max; i++)
6007             {
6008             int len = 1;
6009             if (eptr >= mb->end_subject)
6010               {
6011               SCHECK_PARTIAL();
6012               break;
6013               }
6014             GETCHARLEN(c, eptr, len);
6015             if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0) break;
6016             eptr+= len;
6017             }
6018           break;
6019 
6020           default:
6021           RRETURN(PCRE2_ERROR_INTERNAL);
6022           }
6023 
6024         if (possessive) continue;    /* No backtracking */
6025 
6026         /* After \C in UTF mode, pp might be in the middle of a Unicode
6027         character. Use <= pp to ensure backtracking doesn't go too far. */
6028 
6029         for(;;)
6030           {
6031           if (eptr <= pp) goto TAIL_RECURSE;
6032           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46);
6033           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6034           eptr--;
6035           BACKCHAR(eptr);
6036           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
6037               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
6038           }
6039         }
6040       else
6041 #endif  /* SUPPORT_UNICODE */
6042       /* Not UTF mode */
6043         {
6044         switch(ctype)
6045           {
6046           case OP_ANY:
6047           for (i = min; i < max; i++)
6048             {
6049             if (eptr >= mb->end_subject)
6050               {
6051               SCHECK_PARTIAL();
6052               break;
6053               }
6054             if (IS_NEWLINE(eptr)) break;
6055             if (mb->partial != 0 &&    /* Take care with CRLF partial */
6056                 eptr + 1 >= mb->end_subject &&
6057                 NLBLOCK->nltype == NLTYPE_FIXED &&
6058                 NLBLOCK->nllen == 2 &&
6059                 *eptr == NLBLOCK->nl[0])
6060               {
6061               mb->hitend = TRUE;
6062               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
6063               }
6064             eptr++;
6065             }
6066           break;
6067 
6068           case OP_ALLANY:
6069           case OP_ANYBYTE:
6070           c = max - min;
6071           if (c > (uint32_t)(mb->end_subject - eptr))
6072             {
6073             eptr = mb->end_subject;
6074             SCHECK_PARTIAL();
6075             }
6076           else eptr += c;
6077           break;
6078 
6079           case OP_ANYNL:
6080           for (i = min; i < max; i++)
6081             {
6082             if (eptr >= mb->end_subject)
6083               {
6084               SCHECK_PARTIAL();
6085               break;
6086               }
6087             c = *eptr;
6088             if (c == CHAR_CR)
6089               {
6090               if (++eptr >= mb->end_subject) break;
6091               if (*eptr == CHAR_LF) eptr++;
6092               }
6093             else
6094               {
6095               if (c != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
6096                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
6097 #if PCRE2_CODE_UNIT_WIDTH != 8
6098                  && c != 0x2028 && c != 0x2029
6099 #endif
6100                  ))) break;
6101               eptr++;
6102               }
6103             }
6104           break;
6105 
6106           case OP_NOT_HSPACE:
6107           for (i = min; i < max; i++)
6108             {
6109             if (eptr >= mb->end_subject)
6110               {
6111               SCHECK_PARTIAL();
6112               break;
6113               }
6114             switch(*eptr)
6115               {
6116               default: eptr++; break;
6117               HSPACE_BYTE_CASES:
6118 #if PCRE2_CODE_UNIT_WIDTH != 8
6119               HSPACE_MULTIBYTE_CASES:
6120 #endif
6121               goto ENDLOOP00;
6122               }
6123             }
6124           ENDLOOP00:
6125           break;
6126 
6127           case OP_HSPACE:
6128           for (i = min; i < max; i++)
6129             {
6130             if (eptr >= mb->end_subject)
6131               {
6132               SCHECK_PARTIAL();
6133               break;
6134               }
6135             switch(*eptr)
6136               {
6137               default: goto ENDLOOP01;
6138               HSPACE_BYTE_CASES:
6139 #if PCRE2_CODE_UNIT_WIDTH != 8
6140               HSPACE_MULTIBYTE_CASES:
6141 #endif
6142               eptr++; break;
6143               }
6144             }
6145           ENDLOOP01:
6146           break;
6147 
6148           case OP_NOT_VSPACE:
6149           for (i = min; i < max; i++)
6150             {
6151             if (eptr >= mb->end_subject)
6152               {
6153               SCHECK_PARTIAL();
6154               break;
6155               }
6156             switch(*eptr)
6157               {
6158               default: eptr++; break;
6159               VSPACE_BYTE_CASES:
6160 #if PCRE2_CODE_UNIT_WIDTH != 8
6161               VSPACE_MULTIBYTE_CASES:
6162 #endif
6163               goto ENDLOOP02;
6164               }
6165             }
6166           ENDLOOP02:
6167           break;
6168 
6169           case OP_VSPACE:
6170           for (i = min; i < max; i++)
6171             {
6172             if (eptr >= mb->end_subject)
6173               {
6174               SCHECK_PARTIAL();
6175               break;
6176               }
6177             switch(*eptr)
6178               {
6179               default: goto ENDLOOP03;
6180               VSPACE_BYTE_CASES:
6181 #if PCRE2_CODE_UNIT_WIDTH != 8
6182               VSPACE_MULTIBYTE_CASES:
6183 #endif
6184               eptr++; break;
6185               }
6186             }
6187           ENDLOOP03:
6188           break;
6189 
6190           case OP_NOT_DIGIT:
6191           for (i = min; i < max; i++)
6192             {
6193             if (eptr >= mb->end_subject)
6194               {
6195               SCHECK_PARTIAL();
6196               break;
6197               }
6198             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0) break;
6199             eptr++;
6200             }
6201           break;
6202 
6203           case OP_DIGIT:
6204           for (i = min; i < max; i++)
6205             {
6206             if (eptr >= mb->end_subject)
6207               {
6208               SCHECK_PARTIAL();
6209               break;
6210               }
6211             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0) break;
6212             eptr++;
6213             }
6214           break;
6215 
6216           case OP_NOT_WHITESPACE:
6217           for (i = min; i < max; i++)
6218             {
6219             if (eptr >= mb->end_subject)
6220               {
6221               SCHECK_PARTIAL();
6222               break;
6223               }
6224             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0) break;
6225             eptr++;
6226             }
6227           break;
6228 
6229           case OP_WHITESPACE:
6230           for (i = min; i < max; i++)
6231             {
6232             if (eptr >= mb->end_subject)
6233               {
6234               SCHECK_PARTIAL();
6235               break;
6236               }
6237             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0) break;
6238             eptr++;
6239             }
6240           break;
6241 
6242           case OP_NOT_WORDCHAR:
6243           for (i = min; i < max; i++)
6244             {
6245             if (eptr >= mb->end_subject)
6246               {
6247               SCHECK_PARTIAL();
6248               break;
6249               }
6250             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0) break;
6251             eptr++;
6252             }
6253           break;
6254 
6255           case OP_WORDCHAR:
6256           for (i = min; i < max; i++)
6257             {
6258             if (eptr >= mb->end_subject)
6259               {
6260               SCHECK_PARTIAL();
6261               break;
6262               }
6263             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0) break;
6264             eptr++;
6265             }
6266           break;
6267 
6268           default:
6269           RRETURN(PCRE2_ERROR_INTERNAL);
6270           }
6271 
6272         if (possessive) continue;    /* No backtracking */
6273         for (;;)
6274           {
6275           if (eptr == pp) goto TAIL_RECURSE;
6276           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM47);
6277           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6278           eptr--;
6279           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6280               eptr[-1] == CHAR_CR) eptr--;
6281           }
6282         }
6283 
6284       /* Control never gets here */
6285       }
6286 
6287     /* There's been some horrible disaster. Arrival here can only mean there is
6288     something seriously wrong in the code above or the OP_xxx definitions. */
6289 
6290     default:
6291     RRETURN(PCRE2_ERROR_INTERNAL);
6292     }
6293 
6294   /* Do not stick any code in here without much thought; it is assumed
6295   that "continue" in the code above comes out to here to repeat the main
6296   loop. */
6297 
6298   }             /* End of main loop */
6299 /* Control never reaches here */
6300 
6301 
6302 /* When compiling to use the heap rather than the stack for recursive calls to
6303 match(), the RRETURN() macro jumps here. The number that is saved in
6304 frame->Xwhere indicates which label we actually want to return to. */
6305 
6306 #ifdef HEAP_MATCH_RECURSE
6307 #define LBL(val) case val: goto L_RM##val;
6308 HEAP_RETURN:
6309 switch (frame->Xwhere)
6310   {
6311   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6312   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6313   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6314   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6315   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6316   LBL(65) LBL(66) LBL(68)
6317 #ifdef SUPPORT_WIDE_CHARS
6318   LBL(20) LBL(21)
6319 #endif
6320 #ifdef SUPPORT_UNICODE
6321   LBL(16) LBL(18)
6322   LBL(22) LBL(23) LBL(28) LBL(30)
6323   LBL(32) LBL(34) LBL(42) LBL(46)
6324   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6325   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6326 #endif  /* SUPPORT_UNICODE */
6327   default:
6328   return PCRE2_ERROR_INTERNAL;
6329   }
6330 #undef LBL
6331 #endif  /* HEAP_MATCH_RECURSE */
6332 }
6333 
6334 
6335 /***************************************************************************
6336 ****************************************************************************
6337                    RECURSION IN THE match() FUNCTION
6338 
6339 Undefine all the macros that were defined above to handle this. */
6340 
6341 #ifdef HEAP_MATCH_RECURSE
6342 #undef eptr
6343 #undef ecode
6344 #undef mstart
6345 #undef offset_top
6346 #undef eptrb
6347 #undef flags
6348 
6349 #undef callpat
6350 #undef charptr
6351 #undef data
6352 #undef next_ecode
6353 #undef pp
6354 #undef prev
6355 #undef saved_eptr
6356 
6357 #undef new_recursive
6358 
6359 #undef cur_is_word
6360 #undef condition
6361 #undef prev_is_word
6362 
6363 #undef ctype
6364 #undef length
6365 #undef max
6366 #undef min
6367 #undef number
6368 #undef offset
6369 #undef op
6370 #undef save_capture_last
6371 #undef save_offset1
6372 #undef save_offset2
6373 #undef save_offset3
6374 
6375 #undef newptrb
6376 #endif  /* HEAP_MATCH_RECURSE */
6377 
6378 /* These two are defined as macros in both cases */
6379 
6380 #undef fc
6381 #undef fi
6382 
6383 /***************************************************************************
6384 ***************************************************************************/
6385 
6386 
6387 #ifdef HEAP_MATCH_RECURSE
6388 /*************************************************
6389 *          Release allocated heap frames         *
6390 *************************************************/
6391 
6392 /* This function releases all the allocated frames. The base frame is on the
6393 machine stack, and so must not be freed.
6394 
6395 Argument:
6396   frame_base    the address of the base frame
6397   mb            the match block
6398 
6399 Returns:  nothing
6400 */
6401 
6402 static void
release_match_heapframes(heapframe * frame_base,match_block * mb)6403 release_match_heapframes (heapframe *frame_base, match_block *mb)
6404 {
6405 heapframe *nextframe = frame_base->Xnextframe;
6406 while (nextframe != NULL)
6407   {
6408   heapframe *oldframe = nextframe;
6409   nextframe = nextframe->Xnextframe;
6410   mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
6411   }
6412 }
6413 #endif  /* HEAP_MATCH_RECURSE */
6414 
6415 
6416 
6417 /*************************************************
6418 *           Match a Regular Expression           *
6419 *************************************************/
6420 
6421 /* This function applies a compiled pattern to a subject string and picks out
6422 portions of the string if it matches. Two elements in the vector are set for
6423 each substring: the offsets to the start and end of the substring.
6424 
6425 Arguments:
6426   code            points to the compiled expression
6427   subject         points to the subject string
6428   length          length of subject string (may contain binary zeros)
6429   start_offset    where to start in the subject string
6430   options         option bits
6431   match_data      points to a match_data block
6432   mcontext        points a PCRE2 context
6433 
6434 Returns:          > 0 => success; value is the number of ovector pairs filled
6435                   = 0 => success, but ovector is not big enough
6436                    -1 => failed to match (PCRE2_ERROR_NOMATCH)
6437                    -2 => partial match (PCRE2_ERROR_PARTIAL)
6438                  < -2 => some kind of unexpected problem
6439 */
6440 
6441 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6442 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6443   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6444   pcre2_match_context *mcontext)
6445 {
6446 int rc;
6447 int ocount;
6448 
6449 const uint8_t *start_bits = NULL;
6450 
6451 const pcre2_real_code *re = (const pcre2_real_code *)code;
6452 
6453 BOOL anchored;
6454 BOOL firstline;
6455 BOOL has_first_cu = FALSE;
6456 BOOL has_req_cu = FALSE;
6457 BOOL startline;
6458 BOOL using_temporary_offsets = FALSE;
6459 BOOL utf;
6460 
6461 PCRE2_UCHAR first_cu = 0;
6462 PCRE2_UCHAR first_cu2 = 0;
6463 PCRE2_UCHAR req_cu = 0;
6464 PCRE2_UCHAR req_cu2 = 0;
6465 
6466 PCRE2_SPTR bumpalong_limit;
6467 PCRE2_SPTR end_subject;
6468 PCRE2_SPTR start_match = subject + start_offset;
6469 PCRE2_SPTR req_cu_ptr = start_match - 1;
6470 PCRE2_SPTR start_partial = NULL;
6471 PCRE2_SPTR match_partial = NULL;
6472 
6473 /* We need to have mb pointing to a match block, because the IS_NEWLINE macro
6474 is used below, and it expects NLBLOCK to be defined as a pointer. */
6475 
6476 match_block actual_match_block;
6477 match_block *mb = &actual_match_block;
6478 
6479 #ifdef HEAP_MATCH_RECURSE
6480 heapframe frame_zero;
6481 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6482 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6483 mb->match_frames_base = &frame_zero;
6484 #endif
6485 
6486 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
6487 subject string. */
6488 
6489 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
6490 end_subject = subject + length;
6491 
6492 /* Plausibility checks */
6493 
6494 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6495 if (code == NULL || subject == NULL || match_data == NULL)
6496   return PCRE2_ERROR_NULL;
6497 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6498 
6499 /* Check that the first field in the block is the magic number. */
6500 
6501 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6502 
6503 /* Check the code unit width. */
6504 
6505 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6506   return PCRE2_ERROR_BADMODE;
6507 
6508 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6509 options variable for this function. Users of PCRE2 who are not calling the
6510 function directly would like to have a way of setting these flags, in the same
6511 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6512 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6513 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
6514 transferred to the options for this function. The bits are guaranteed to be
6515 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6516 that the match-time bits are not more significant than the flag bits. If by
6517 accident this is not the case, a compile-time division by zero error will
6518 occur. */
6519 
6520 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6521 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6522 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6523 #undef FF
6524 #undef OO
6525 
6526 /* A NULL match context means "use a default context" */
6527 
6528 if (mcontext == NULL)
6529   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6530 
6531 /* These two settings are used in the code for checking a UTF string that
6532 follows immediately afterwards. Other values in the mb block are used only
6533 during interpretive pcre_match() processing, not when the JIT support is in
6534 use, so they are set up later. */
6535 
6536 utf = (re->overall_options & PCRE2_UTF) != 0;
6537 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6538               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6539 
6540 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
6541 we must also check that a starting offset does not point into the middle of a
6542 multiunit character. We check only the portion of the subject that is going to
6543 be inspected during matching - from the offset minus the maximum back reference
6544 to the given length. This saves time when a small part of a large subject is
6545 being matched by the use of a starting offset. Note that the maximum lookbehind
6546 is a number of characters, not code units. */
6547 
6548 #ifdef SUPPORT_UNICODE
6549 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
6550   {
6551   PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
6552 
6553   if (start_offset > 0)
6554     {
6555 #if PCRE2_CODE_UNIT_WIDTH != 32
6556     unsigned int i;
6557     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6558       return PCRE2_ERROR_BADUTFOFFSET;
6559     for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
6560       {
6561       check_subject--;
6562       while (check_subject > subject &&
6563 #if PCRE2_CODE_UNIT_WIDTH == 8
6564       (*check_subject & 0xc0) == 0x80)
6565 #else  /* 16-bit */
6566       (*check_subject & 0xfc00) == 0xdc00)
6567 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6568         check_subject--;
6569       }
6570 #else
6571     /* In the 32-bit library, one code unit equals one character. However,
6572     we cannot just subtract the lookbehind and then compare pointers, because
6573     a very large lookbehind could create an invalid pointer. */
6574 
6575     if (start_offset >= re->max_lookbehind)
6576       check_subject -= re->max_lookbehind;
6577     else
6578       check_subject = subject;
6579 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6580     }
6581 
6582   /* Validate the relevant portion of the subject. After an error, adjust the
6583   offset to be an absolute offset in the whole string. */
6584 
6585   match_data->rc = PRIV(valid_utf)(check_subject,
6586     length - (check_subject - subject), &(match_data->startchar));
6587   if (match_data->rc != 0)
6588     {
6589     match_data->startchar += check_subject - subject;
6590     return match_data->rc;
6591     }
6592   }
6593 #endif  /* SUPPORT_UNICODE */
6594 
6595 /* It is an error to set an offset limit without setting the flag at compile
6596 time. */
6597 
6598 if (mcontext->offset_limit != PCRE2_UNSET &&
6599      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6600   return PCRE2_ERROR_BADOFFSETLIMIT;
6601 
6602 /* If the pattern was successfully studied with JIT support, run the JIT
6603 executable instead of the rest of this function. Most options must be set at
6604 compile time for the JIT code to be usable. Fallback to the normal code path if
6605 an unsupported option is set or if JIT returns BADOPTION (which means that the
6606 selected normal or partial matching mode was not compiled). */
6607 
6608 #ifdef SUPPORT_JIT
6609 if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
6610   {
6611   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6612     match_data, mcontext);
6613   if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc;
6614   }
6615 #endif
6616 
6617 /* Carry on with non-JIT matching. */
6618 
6619 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6620 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6621 startline = (re->flags & PCRE2_STARTLINE) != 0;
6622 bumpalong_limit =  (mcontext->offset_limit == PCRE2_UNSET)?
6623   end_subject : subject + mcontext->offset_limit;
6624 
6625 /* Fill in the fields in the match block. */
6626 
6627 mb->callout = mcontext->callout;
6628 mb->callout_data = mcontext->callout_data;
6629 mb->memctl = mcontext->memctl;
6630 #ifdef HEAP_MATCH_RECURSE
6631 mb->stack_memctl = mcontext->stack_memctl;
6632 #endif
6633 
6634 mb->start_subject = subject;
6635 mb->start_offset = start_offset;
6636 mb->end_subject = end_subject;
6637 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6638 
6639 mb->moptions = options;                 /* Match options */
6640 mb->poptions = re->overall_options;     /* Pattern options */
6641 
6642 mb->ignore_skip_arg = 0;
6643 mb->mark = mb->nomatch_mark = NULL;     /* In case never set */
6644 mb->recursive = NULL;                   /* No recursion at top level */
6645 mb->ovecsave_chain = NULL;              /* No ovecsave blocks yet */
6646 mb->hitend = FALSE;
6647 
6648 /* The name table is needed for finding all the numbers associated with a
6649 given name, for condition testing. The code follows the name table. */
6650 
6651 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6652 mb->name_count = re->name_count;
6653 mb->name_entry_size = re->name_entry_size;
6654 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6655 
6656 /* Limits set in the pattern override the match context only if they are
6657 smaller. */
6658 
6659 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6660                   mcontext->match_limit : re->limit_match;
6661 mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
6662                             mcontext->recursion_limit : re->limit_recursion;
6663 
6664 /* Pointers to the individual character tables */
6665 
6666 mb->lcc = re->tables + lcc_offset;
6667 mb->fcc = re->tables + fcc_offset;
6668 mb->ctypes = re->tables + ctypes_offset;
6669 
6670 /* Process the \R and newline settings. */
6671 
6672 mb->bsr_convention = re->bsr_convention;
6673 mb->nltype = NLTYPE_FIXED;
6674 switch(re->newline_convention)
6675   {
6676   case PCRE2_NEWLINE_CR:
6677   mb->nllen = 1;
6678   mb->nl[0] = CHAR_CR;
6679   break;
6680 
6681   case PCRE2_NEWLINE_LF:
6682   mb->nllen = 1;
6683   mb->nl[0] = CHAR_NL;
6684   break;
6685 
6686   case PCRE2_NEWLINE_CRLF:
6687   mb->nllen = 2;
6688   mb->nl[0] = CHAR_CR;
6689   mb->nl[1] = CHAR_NL;
6690   break;
6691 
6692   case PCRE2_NEWLINE_ANY:
6693   mb->nltype = NLTYPE_ANY;
6694   break;
6695 
6696   case PCRE2_NEWLINE_ANYCRLF:
6697   mb->nltype = NLTYPE_ANYCRLF;
6698   break;
6699 
6700   default: return PCRE2_ERROR_INTERNAL;
6701   }
6702 
6703 /* If the expression has got more back references than the offsets supplied can
6704 hold, we get a temporary chunk of memory to use during the matching. Otherwise,
6705 we can use the vector supplied. The size of the ovector is three times the
6706 value in the oveccount field. Two-thirds of it is pairs for storing matching
6707 offsets, and the top third is working space. */
6708 
6709 if (re->top_backref >= match_data->oveccount)
6710   {
6711   ocount = re->top_backref * 3 + 3;
6712   mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
6713     mb->memctl.memory_data));
6714   if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
6715   using_temporary_offsets = TRUE;
6716   }
6717 else
6718   {
6719   ocount = 3 * match_data->oveccount;
6720   mb->ovector = match_data->ovector;
6721   }
6722 
6723 mb->offset_end = ocount;
6724 mb->offset_max = (2*ocount)/3;
6725 
6726 /* Reset the working variable associated with each extraction. These should
6727 never be used unless previously set, but they get saved and restored, and so we
6728 initialize them to avoid reading uninitialized locations. Also, unset the
6729 offsets for the matched string. This is really just for tidiness with callouts,
6730 in case they inspect these fields. */
6731 
6732 if (ocount > 0)
6733   {
6734   register PCRE2_SIZE *iptr = mb->ovector + ocount;
6735   register PCRE2_SIZE *iend = iptr - re->top_bracket;
6736   if (iend < mb->ovector + 2) iend = mb->ovector + 2;
6737   while (--iptr >= iend) *iptr = PCRE2_UNSET;
6738   mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
6739   }
6740 
6741 /* Set up the first code unit to match, if available. The first_codeunit value
6742 is never set for an anchored regular expression, but the anchoring may be
6743 forced at run time, so we have to test for anchoring. The first code unit may
6744 be unset for an unanchored pattern, of course. If there's no first code unit
6745 there may be a bitmap of possible first characters. */
6746 
6747 if (!anchored)
6748   {
6749   if ((re->flags & PCRE2_FIRSTSET) != 0)
6750     {
6751     has_first_cu = TRUE;
6752     first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6753     if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6754       {
6755       first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6756 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6757       if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
6758 #endif
6759       }
6760     }
6761   else
6762     if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6763       start_bits = re->start_bitmap;
6764   }
6765 
6766 /* For anchored or unanchored matches, there may be a "last known required
6767 character" set. */
6768 
6769 if ((re->flags & PCRE2_LASTSET) != 0)
6770   {
6771   has_req_cu = TRUE;
6772   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6773   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6774     {
6775     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6776 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
6777     if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
6778 #endif
6779     }
6780   }
6781 
6782 
6783 /* ==========================================================================*/
6784 
6785 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6786 the loop runs just once. */
6787 
6788 for(;;)
6789   {
6790   PCRE2_SPTR new_start_match;
6791   mb->capture_last = 0;
6792 
6793   /* ----------------- Start of match optimizations ---------------- */
6794 
6795   /* There are some optimizations that avoid running the match if a known
6796   starting point is not found, or if a known later code unit is not present.
6797   However, there is an option (settable at compile time) that disables these,
6798   for testing and for ensuring that all callouts do actually occur. */
6799 
6800   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6801     {
6802     PCRE2_SPTR save_end_subject = end_subject;
6803 
6804     /* If firstline is TRUE, the start of the match is constrained to the first
6805     line of a multiline string. That is, the match must be before or at the
6806     first newline. Implement this by temporarily adjusting end_subject so that
6807     we stop the optimization scans at a newline. If the match fails at the
6808     newline, later code breaks this loop. */
6809 
6810     if (firstline)
6811       {
6812       PCRE2_SPTR t = start_match;
6813 #ifdef SUPPORT_UNICODE
6814       if (utf)
6815         {
6816         while (t < mb->end_subject && !IS_NEWLINE(t))
6817           {
6818           t++;
6819           ACROSSCHAR(t < end_subject, *t, t++);
6820           }
6821         }
6822       else
6823 #endif
6824       while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
6825       end_subject = t;
6826       }
6827 
6828     /* Advance to a unique first code unit if there is one. In 8-bit mode, the
6829     use of memchr() gives a big speed up. */
6830 
6831     if (has_first_cu)
6832       {
6833       PCRE2_UCHAR smc;
6834       if (first_cu != first_cu2)
6835         while (start_match < end_subject &&
6836           (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
6837           start_match++;
6838       else
6839         {
6840 #if PCRE2_CODE_UNIT_WIDTH != 8
6841         while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
6842           start_match++;
6843 #else
6844         start_match = memchr(start_match, first_cu, end_subject - start_match);
6845         if (start_match == NULL) start_match = end_subject;
6846 #endif
6847         }
6848       }
6849 
6850     /* Or to just after a linebreak for a multiline match */
6851 
6852     else if (startline)
6853       {
6854       if (start_match > mb->start_subject + start_offset)
6855         {
6856 #ifdef SUPPORT_UNICODE
6857         if (utf)
6858           {
6859           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6860             {
6861             start_match++;
6862             ACROSSCHAR(start_match < end_subject, *start_match,
6863               start_match++);
6864             }
6865           }
6866         else
6867 #endif
6868         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6869           start_match++;
6870 
6871         /* If we have just passed a CR and the newline option is ANY or
6872         ANYCRLF, and we are now at a LF, advance the match position by one more
6873         code unit. */
6874 
6875         if (start_match[-1] == CHAR_CR &&
6876              (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
6877              start_match < end_subject &&
6878              UCHAR21TEST(start_match) == CHAR_NL)
6879           start_match++;
6880         }
6881       }
6882 
6883     /* Or to a non-unique first code unit if any have been identified. The
6884     bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
6885     code units greater than 254 set the 255 bit. */
6886 
6887     else if (start_bits != NULL)
6888       {
6889       while (start_match < end_subject)
6890         {
6891         register uint32_t c = UCHAR21TEST(start_match);
6892 #if PCRE2_CODE_UNIT_WIDTH != 8
6893         if (c > 255) c = 255;
6894 #endif
6895         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6896         start_match++;
6897         }
6898       }
6899 
6900     /* Restore fudged end_subject */
6901 
6902     end_subject = save_end_subject;
6903 
6904     /* The following two optimizations are disabled for partial matching. */
6905 
6906     if (!mb->partial)
6907       {
6908       /* The minimum matching length is a lower bound; no actual string of that
6909       length may actually match the pattern. Although the value is, strictly,
6910       in characters, we treat it as code units to avoid spending too much time
6911       in this optimization. */
6912 
6913       if (end_subject - start_match < re->minlength)
6914         {
6915         rc = MATCH_NOMATCH;
6916         break;
6917         }
6918 
6919       /* If req_cu is set, we know that that code unit must appear in the
6920       subject for the match to succeed. If the first code unit is set, req_cu
6921       must be later in the subject; otherwise the test starts at the match
6922       point. This optimization can save a huge amount of backtracking in
6923       patterns with nested unlimited repeats that aren't going to match.
6924       Writing separate code for cased/caseless versions makes it go faster, as
6925       does using an autoincrement and backing off on a match.
6926 
6927       HOWEVER: when the subject string is very, very long, searching to its end
6928       can take a long time, and give bad performance on quite ordinary
6929       patterns. This showed up when somebody was matching something like
6930       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
6931       sufficiently long. */
6932 
6933       if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
6934         {
6935         register PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
6936 
6937         /* We don't need to repeat the search if we haven't yet reached the
6938         place we found it at last time. */
6939 
6940         if (p > req_cu_ptr)
6941           {
6942           if (req_cu != req_cu2)
6943             {
6944             while (p < end_subject)
6945               {
6946               register uint32_t pp = UCHAR21INCTEST(p);
6947               if (pp == req_cu || pp == req_cu2) { p--; break; }
6948               }
6949             }
6950           else
6951             {
6952             while (p < end_subject)
6953               {
6954               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
6955               }
6956             }
6957 
6958           /* If we can't find the required code unit, break the matching loop,
6959           forcing a match failure. */
6960 
6961           if (p >= end_subject)
6962             {
6963             rc = MATCH_NOMATCH;
6964             break;
6965             }
6966 
6967           /* If we have found the required code unit, save the point where we
6968           found it, so that we don't search again next time round the loop if
6969           the start hasn't passed this code unit yet. */
6970 
6971           req_cu_ptr = p;
6972           }
6973         }
6974       }
6975     }
6976 
6977   /* ------------ End of start of match optimizations ------------ */
6978 
6979   /* Give no match if we have passed the bumpalong limit. */
6980 
6981   if (start_match > bumpalong_limit)
6982     {
6983     rc = MATCH_NOMATCH;
6984     break;
6985     }
6986 
6987   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6988   first starting point for which a partial match was found. */
6989 
6990   mb->start_match_ptr = start_match;
6991   mb->start_used_ptr = start_match;
6992   mb->last_used_ptr = start_match;
6993   mb->match_call_count = 0;
6994   mb->match_function_type = 0;
6995   mb->end_offset_top = 0;
6996   mb->skip_arg_count = 0;
6997   rc = match(start_match, mb->start_code, start_match, 2, mb, NULL, 0);
6998 
6999   if (mb->hitend && start_partial == NULL)
7000     {
7001     start_partial = mb->start_used_ptr;
7002     match_partial = start_match;
7003     }
7004 
7005   switch(rc)
7006     {
7007     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7008     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7009     entirely. The only way we can do that is to re-do the match at the same
7010     point, with a flag to force SKIP with an argument to be ignored. Just
7011     treating this case as NOMATCH does not work because it does not check other
7012     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7013 
7014     case MATCH_SKIP_ARG:
7015     new_start_match = start_match;
7016     mb->ignore_skip_arg = mb->skip_arg_count;
7017     break;
7018 
7019     /* SKIP passes back the next starting point explicitly, but if it is no
7020     greater than the match we have just done, treat it as NOMATCH. */
7021 
7022     case MATCH_SKIP:
7023     if (mb->start_match_ptr > start_match)
7024       {
7025       new_start_match = mb->start_match_ptr;
7026       break;
7027       }
7028     /* Fall through */
7029 
7030     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7031     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7032 
7033     case MATCH_NOMATCH:
7034     case MATCH_PRUNE:
7035     case MATCH_THEN:
7036     mb->ignore_skip_arg = 0;
7037     new_start_match = start_match + 1;
7038 #ifdef SUPPORT_UNICODE
7039     if (utf)
7040       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
7041         new_start_match++);
7042 #endif
7043     break;
7044 
7045     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7046 
7047     case MATCH_COMMIT:
7048     rc = MATCH_NOMATCH;
7049     goto ENDLOOP;
7050 
7051     /* Any other return is either a match, or some kind of error. */
7052 
7053     default:
7054     goto ENDLOOP;
7055     }
7056 
7057   /* Control reaches here for the various types of "no match at this point"
7058   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7059 
7060   rc = MATCH_NOMATCH;
7061 
7062   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7063   newline in the subject (though it may continue over the newline). Therefore,
7064   if we have just failed to match, starting at a newline, do not continue. */
7065 
7066   if (firstline && IS_NEWLINE(start_match)) break;
7067 
7068   /* Advance to new matching position */
7069 
7070   start_match = new_start_match;
7071 
7072   /* Break the loop if the pattern is anchored or if we have passed the end of
7073   the subject. */
7074 
7075   if (anchored || start_match > end_subject) break;
7076 
7077   /* If we have just passed a CR and we are now at a LF, and the pattern does
7078   not contain any explicit matches for \r or \n, and the newline option is CRLF
7079   or ANY or ANYCRLF, advance the match position by one more code unit. In
7080   normal matching start_match will aways be greater than the first position at
7081   this stage, but a failed *SKIP can cause a return at the same point, which is
7082   why the first test exists. */
7083 
7084   if (start_match > subject + start_offset &&
7085       start_match[-1] == CHAR_CR &&
7086       start_match < end_subject &&
7087       *start_match == CHAR_NL &&
7088       (re->flags & PCRE2_HASCRORLF) == 0 &&
7089         (mb->nltype == NLTYPE_ANY ||
7090          mb->nltype == NLTYPE_ANYCRLF ||
7091          mb->nllen == 2))
7092     start_match++;
7093 
7094   mb->mark = NULL;   /* Reset for start of next match attempt */
7095   }                  /* End of for(;;) "bumpalong" loop */
7096 
7097 /* ==========================================================================*/
7098 
7099 /* When we reach here, one of the stopping conditions is true:
7100 
7101 (1) The match succeeded, either completely, or partially;
7102 
7103 (2) The pattern is anchored or the match was failed by (*COMMIT);
7104 
7105 (3) We are past the end of the subject or the bumpalong limit;
7106 
7107 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7108     this option requests that a match occur at or before the first newline in
7109     the subject.
7110 
7111 (5) Some kind of error occurred.
7112 
7113 */
7114 
7115 ENDLOOP:
7116 
7117 #ifdef HEAP_MATCH_RECURSE
7118 release_match_heapframes(&frame_zero, mb);
7119 #endif
7120 
7121 /* Release any frames that were saved from recursions. */
7122 
7123 while (mb->ovecsave_chain != NULL)
7124   {
7125   ovecsave_frame *this = mb->ovecsave_chain;
7126   mb->ovecsave_chain = this->next;
7127   mb->memctl.free(this, mb->memctl.memory_data);
7128   }
7129 
7130 /* Fill in fields that are always returned in the match data. */
7131 
7132 match_data->code = re;
7133 match_data->subject = subject;
7134 match_data->mark = mb->mark;
7135 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7136 
7137 /* Handle a fully successful match. */
7138 
7139 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7140   {
7141   uint32_t arg_offset_max = 2 * match_data->oveccount;
7142 
7143   /* When the offset vector is big enough to deal with any backreferences,
7144   captured substring offsets will already be set up. In the case where we had
7145   to get some local memory to hold offsets for backreference processing, copy
7146   those that we can. In this case there need not be overflow if certain parts
7147   of the pattern were not used, even though there are more capturing
7148   parentheses than vector slots. */
7149 
7150   if (using_temporary_offsets)
7151     {
7152     if (arg_offset_max >= 4)
7153       {
7154       memcpy(match_data->ovector + 2, mb->ovector + 2,
7155         (arg_offset_max - 2) * sizeof(PCRE2_SIZE));
7156       }
7157     if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
7158     mb->memctl.free(mb->ovector, mb->memctl.memory_data);
7159     }
7160 
7161   /* Set the return code to the number of captured strings, or 0 if there were
7162   too many to fit into the ovector. */
7163 
7164   match_data->rc = ((mb->capture_last & OVFLBIT) != 0)?
7165     0 : mb->end_offset_top/2;
7166 
7167   /* If there is space in the offset vector, set any pairs that follow the
7168   highest-numbered captured string but are less than the number of capturing
7169   groups in the pattern (and are within the ovector) to PCRE2_UNSET. It is
7170   documented that this happens. In earlier versions, the whole set of potential
7171   capturing offsets was initialized each time round the loop, but this is
7172   handled differently now. "Gaps" are set to PCRE2_UNSET dynamically instead
7173   (this fixed a bug). Thus, it is only those at the end that need setting here.
7174   We can't just mark them all unset at the start of the whole thing because
7175   they may get set in one branch that is not the final matching branch. */
7176 
7177   if (mb->end_offset_top/2 <= re->top_bracket)
7178     {
7179     register PCRE2_SIZE *iptr, *iend;
7180     int resetcount = re->top_bracket + 1;
7181     if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
7182     iptr = match_data->ovector + mb->end_offset_top;
7183     iend = match_data->ovector + 2 * resetcount;
7184     while (iptr < iend) *iptr++ = PCRE2_UNSET;
7185     }
7186 
7187   /* If there is space, set up the whole thing as substring 0. The value of
7188   mb->start_match_ptr might be modified if \K was encountered on the success
7189   matching path. */
7190 
7191   if (match_data->oveccount < 1) rc = 0; else
7192     {
7193     match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
7194     match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
7195     }
7196 
7197   /* Set the remaining returned values */
7198 
7199   match_data->startchar = start_match - subject;
7200   match_data->leftchar = mb->start_used_ptr - subject;
7201   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7202     mb->last_used_ptr : mb->end_match_ptr) - subject;
7203   return match_data->rc;
7204   }
7205 
7206 /* Control gets here if there has been a partial match, an error, or if the
7207 overall match attempt has failed at all permitted starting positions. Any mark
7208 data is in the nomatch_mark field. */
7209 
7210 match_data->mark = mb->nomatch_mark;
7211 
7212 /* For anything other than nomatch or partial match, just return the code. */
7213 
7214 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
7215   match_data->rc = rc;
7216 
7217 /* Else handle a partial match. */
7218 
7219 else if (match_partial != NULL)
7220   {
7221   if (match_data->oveccount > 0)
7222     {
7223     match_data->ovector[0] = match_partial - subject;
7224     match_data->ovector[1] = end_subject - subject;
7225     }
7226   match_data->startchar = match_partial - subject;
7227   match_data->leftchar = start_partial - subject;
7228   match_data->rightchar = end_subject - subject;
7229   match_data->rc = PCRE2_ERROR_PARTIAL;
7230   }
7231 
7232 /* Else this is the classic nomatch case. */
7233 
7234 else match_data->rc = PCRE2_ERROR_NOMATCH;
7235 
7236 /* Free any temporary offsets. */
7237 
7238 if (using_temporary_offsets)
7239   mb->memctl.free(mb->ovector, mb->memctl.memory_data);
7240 return match_data->rc;
7241 }
7242 
7243 /* End of pcre2_match.c */
7244