• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2019 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #define PTR_STACK_SIZE 20
49 
50 #define SUBSTITUTE_OPTIONS \
51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
54 
55 
56 
57 /*************************************************
58 *           Find end of substitute text          *
59 *************************************************/
60 
61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
62 constructions. This requires the identification of unescaped : and }
63 characters. This function scans for such. It must deal with nested ${
64 constructions. The pointer to the text is updated, either to the required end
65 character, or to where an error was detected.
66 
67 Arguments:
68   code      points to the compiled expression (for options)
69   ptrptr    points to the pointer to the start of the text (updated)
70   ptrend    end of the whole string
71   last      TRUE if the last expected string (only } recognized)
72 
73 Returns:    0 on success
74             negative error code on failure
75 */
76 
77 static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
79   BOOL last)
80 {
81 int rc = 0;
82 uint32_t nestlevel = 0;
83 BOOL literal = FALSE;
84 PCRE2_SPTR ptr = *ptrptr;
85 
86 for (; ptr < ptrend; ptr++)
87   {
88   if (literal)
89     {
90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
91       {
92       literal = FALSE;
93       ptr += 1;
94       }
95     }
96 
97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
98     {
99     if (nestlevel == 0) goto EXIT;
100     nestlevel--;
101     }
102 
103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
104 
105   else if (*ptr == CHAR_DOLLAR_SIGN)
106     {
107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
108       {
109       nestlevel++;
110       ptr += 1;
111       }
112     }
113 
114   else if (*ptr == CHAR_BACKSLASH)
115     {
116     int erc;
117     int errorcode;
118     uint32_t ch;
119 
120     if (ptr < ptrend - 1) switch (ptr[1])
121       {
122       case CHAR_L:
123       case CHAR_l:
124       case CHAR_U:
125       case CHAR_u:
126       ptr += 1;
127       continue;
128       }
129 
130     ptr += 1;  /* Must point after \ */
131     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
132       code->overall_options, code->extra_options, FALSE, NULL);
133     ptr -= 1;  /* Back to last code unit of escape */
134     if (errorcode != 0)
135       {
136       rc = errorcode;
137       goto EXIT;
138       }
139 
140     switch(erc)
141       {
142       case 0:      /* Data character */
143       case ESC_E:  /* Isolated \E is ignored */
144       break;
145 
146       case ESC_Q:
147       literal = TRUE;
148       break;
149 
150       default:
151       rc = PCRE2_ERROR_BADREPESCAPE;
152       goto EXIT;
153       }
154     }
155   }
156 
157 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
158 
159 EXIT:
160 *ptrptr = ptr;
161 return rc;
162 }
163 
164 
165 
166 /*************************************************
167 *              Match and substitute              *
168 *************************************************/
169 
170 /* This function applies a compiled re to a subject string and creates a new
171 string with substitutions. The first 7 arguments are the same as for
172 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
173 
174 Arguments:
175   code            points to the compiled expression
176   subject         points to the subject string
177   length          length of subject string (may contain binary zeros)
178   start_offset    where to start in the subject string
179   options         option bits
180   match_data      points to a match_data block, or is NULL
181   context         points a PCRE2 context
182   replacement     points to the replacement string
183   rlength         length of replacement string
184   buffer          where to put the substituted string
185   blength         points to length of buffer; updated to length of string
186 
187 Returns:          >= 0 number of substitutions made
188                   < 0 an error code
189                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
190 */
191 
192 /* This macro checks for space in the buffer before copying into it. On
193 overflow, either give an error immediately, or keep on, accumulating the
194 length. */
195 
196 #define CHECKMEMCPY(from,length) \
197   if (!overflowed && lengthleft < length) \
198     { \
199     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
200     overflowed = TRUE; \
201     extra_needed = length - lengthleft; \
202     } \
203   else if (overflowed) \
204     { \
205     extra_needed += length; \
206     }  \
207   else \
208     {  \
209     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
210     buff_offset += length; \
211     lengthleft -= length; \
212     }
213 
214 /* Here's the function */
215 
216 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)217 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
218   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
219   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
220   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
221 {
222 int rc;
223 int subs;
224 int forcecase = 0;
225 int forcecasereset = 0;
226 uint32_t ovector_count;
227 uint32_t goptions = 0;
228 uint32_t suboptions;
229 BOOL match_data_created = FALSE;
230 BOOL literal = FALSE;
231 BOOL overflowed = FALSE;
232 #ifdef SUPPORT_UNICODE
233 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
234 #endif
235 PCRE2_UCHAR temp[6];
236 PCRE2_SPTR ptr;
237 PCRE2_SPTR repend;
238 PCRE2_SIZE extra_needed = 0;
239 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
240 PCRE2_SIZE *ovector;
241 PCRE2_SIZE ovecsave[3];
242 pcre2_substitute_callout_block scb;
243 
244 /* General initialization */
245 
246 buff_offset = 0;
247 lengthleft = buff_length = *blength;
248 *blength = PCRE2_UNSET;
249 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
250 
251 /* Partial matching is not valid. This must come after setting *blength to
252 PCRE2_UNSET, so as not to imply an offset in the replacement. */
253 
254 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
255   return PCRE2_ERROR_BADOPTION;
256 
257 /* If no match data block is provided, create one. */
258 
259 if (match_data == NULL)
260   {
261   pcre2_general_context *gcontext = (mcontext == NULL)?
262     (pcre2_general_context *)code :
263     (pcre2_general_context *)mcontext;
264   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
265   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
266   match_data_created = TRUE;
267   }
268 ovector = pcre2_get_ovector_pointer(match_data);
269 ovector_count = pcre2_get_ovector_count(match_data);
270 
271 /* Fixed things in the callout block */
272 
273 scb.version = 0;
274 scb.input = subject;
275 scb.output = (PCRE2_SPTR)buffer;
276 scb.ovector = ovector;
277 
278 /* Find lengths of zero-terminated strings and the end of the replacement. */
279 
280 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
281 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
282 repend = replacement + rlength;
283 
284 /* Check UTF replacement string if necessary. */
285 
286 #ifdef SUPPORT_UNICODE
287 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
288   {
289   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
290   if (rc != 0)
291     {
292     match_data->leftchar = 0;
293     goto EXIT;
294     }
295   }
296 #endif  /* SUPPORT_UNICODE */
297 
298 /* Save the substitute options and remove them from the match options. */
299 
300 suboptions = options & SUBSTITUTE_OPTIONS;
301 options &= ~SUBSTITUTE_OPTIONS;
302 
303 /* Copy up to the start offset */
304 
305 if (start_offset > length)
306   {
307   match_data->leftchar = 0;
308   rc = PCRE2_ERROR_BADOFFSET;
309   goto EXIT;
310   }
311 CHECKMEMCPY(subject, start_offset);
312 
313 /* Loop for global substituting. */
314 
315 subs = 0;
316 do
317   {
318   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
319   uint32_t ptrstackptr = 0;
320 
321   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
322     match_data, mcontext);
323 
324 #ifdef SUPPORT_UNICODE
325   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
326 #endif
327 
328   /* Any error other than no match returns the error code. No match when not
329   doing the special after-empty-match global rematch, or when at the end of the
330   subject, breaks the global loop. Otherwise, advance the starting point by one
331   character, copying it to the output, and try again. */
332 
333   if (rc < 0)
334     {
335     PCRE2_SIZE save_start;
336 
337     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
338     if (goptions == 0 || start_offset >= length) break;
339 
340     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
341     we have advanced into the middle of it, advance one more code point. In
342     other words, do not start in the middle of CRLF, even if CR and LF on their
343     own are valid newlines. */
344 
345     save_start = start_offset++;
346     if (subject[start_offset-1] == CHAR_CR &&
347         code->newline_convention != PCRE2_NEWLINE_CR &&
348         code->newline_convention != PCRE2_NEWLINE_LF &&
349         start_offset < length &&
350         subject[start_offset] == CHAR_LF)
351       start_offset++;
352 
353     /* Otherwise, in UTF mode, advance past any secondary code points. */
354 
355     else if ((code->overall_options & PCRE2_UTF) != 0)
356       {
357 #if PCRE2_CODE_UNIT_WIDTH == 8
358       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
359         start_offset++;
360 #elif PCRE2_CODE_UNIT_WIDTH == 16
361       while (start_offset < length &&
362             (subject[start_offset] & 0xfc00) == 0xdc00)
363         start_offset++;
364 #endif
365       }
366 
367     /* Copy what we have advanced past, reset the special global options, and
368     continue to the next match. */
369 
370     fraglength = start_offset - save_start;
371     CHECKMEMCPY(subject + save_start, fraglength);
372     goptions = 0;
373     continue;
374     }
375 
376   /* Handle a successful match. Matches that use \K to end before they start
377   or start before the current point in the subject are not supported. */
378 
379   if (ovector[1] < ovector[0] || ovector[0] < start_offset)
380     {
381     rc = PCRE2_ERROR_BADSUBSPATTERN;
382     goto EXIT;
383     }
384 
385   /* Check for the same match as previous. This is legitimate after matching an
386   empty string that starts after the initial match offset. We have tried again
387   at the match point in case the pattern is one like /(?<=\G.)/ which can never
388   match at its starting point, so running the match achieves the bumpalong. If
389   we do get the same (null) match at the original match point, it isn't such a
390   pattern, so we now do the empty string magic. In all other cases, a repeat
391   match should never occur. */
392 
393   if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
394     {
395     if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
396       {
397       goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
398       ovecsave[2] = start_offset;
399       continue;    /* Back to the top of the loop */
400       }
401     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
402     goto EXIT;
403     }
404 
405   /* Count substitutions with a paranoid check for integer overflow; surely no
406   real call to this function would ever hit this! */
407 
408   if (subs == INT_MAX)
409     {
410     rc = PCRE2_ERROR_TOOMANYREPLACE;
411     goto EXIT;
412     }
413   subs++;
414 
415   /* Copy the text leading up to the match, and remember where the insert
416   begins and how many ovector pairs are set. */
417 
418   if (rc == 0) rc = ovector_count;
419   fraglength = ovector[0] - start_offset;
420   CHECKMEMCPY(subject + start_offset, fraglength);
421   scb.output_offsets[0] = buff_offset;
422   scb.oveccount = rc;
423 
424   /* Process the replacement string. Literal mode is set by \Q, but only in
425   extended mode when backslashes are being interpreted. In extended mode we
426   must handle nested substrings that are to be reprocessed. */
427 
428   ptr = replacement;
429   for (;;)
430     {
431     uint32_t ch;
432     unsigned int chlen;
433 
434     /* If at the end of a nested substring, pop the stack. */
435 
436     if (ptr >= repend)
437       {
438       if (ptrstackptr == 0) break;       /* End of replacement string */
439       repend = ptrstack[--ptrstackptr];
440       ptr = ptrstack[--ptrstackptr];
441       continue;
442       }
443 
444     /* Handle the next character */
445 
446     if (literal)
447       {
448       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
449         {
450         literal = FALSE;
451         ptr += 2;
452         continue;
453         }
454       goto LOADLITERAL;
455       }
456 
457     /* Not in literal mode. */
458 
459     if (*ptr == CHAR_DOLLAR_SIGN)
460       {
461       int group, n;
462       uint32_t special = 0;
463       BOOL inparens;
464       BOOL star;
465       PCRE2_SIZE sublength;
466       PCRE2_SPTR text1_start = NULL;
467       PCRE2_SPTR text1_end = NULL;
468       PCRE2_SPTR text2_start = NULL;
469       PCRE2_SPTR text2_end = NULL;
470       PCRE2_UCHAR next;
471       PCRE2_UCHAR name[33];
472 
473       if (++ptr >= repend) goto BAD;
474       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
475 
476       group = -1;
477       n = 0;
478       inparens = FALSE;
479       star = FALSE;
480 
481       if (next == CHAR_LEFT_CURLY_BRACKET)
482         {
483         if (++ptr >= repend) goto BAD;
484         next = *ptr;
485         inparens = TRUE;
486         }
487 
488       if (next == CHAR_ASTERISK)
489         {
490         if (++ptr >= repend) goto BAD;
491         next = *ptr;
492         star = TRUE;
493         }
494 
495       if (!star && next >= CHAR_0 && next <= CHAR_9)
496         {
497         group = next - CHAR_0;
498         while (++ptr < repend)
499           {
500           next = *ptr;
501           if (next < CHAR_0 || next > CHAR_9) break;
502           group = group * 10 + next - CHAR_0;
503 
504           /* A check for a number greater than the hightest captured group
505           is sufficient here; no need for a separate overflow check. If unknown
506           groups are to be treated as unset, just skip over any remaining
507           digits and carry on. */
508 
509           if (group > code->top_bracket)
510             {
511             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
512               {
513               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
514               break;
515               }
516             else
517               {
518               rc = PCRE2_ERROR_NOSUBSTRING;
519               goto PTREXIT;
520               }
521             }
522           }
523         }
524       else
525         {
526         const uint8_t *ctypes = code->tables + ctypes_offset;
527         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
528           {
529           name[n++] = next;
530           if (n > 32) goto BAD;
531           if (++ptr >= repend) break;
532           next = *ptr;
533           }
534         if (n == 0) goto BAD;
535         name[n] = 0;
536         }
537 
538       /* In extended mode we recognize ${name:+set text:unset text} and
539       ${name:-default text}. */
540 
541       if (inparens)
542         {
543         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
544              !star && ptr < repend - 2 && next == CHAR_COLON)
545           {
546           special = *(++ptr);
547           if (special != CHAR_PLUS && special != CHAR_MINUS)
548             {
549             rc = PCRE2_ERROR_BADSUBSTITUTION;
550             goto PTREXIT;
551             }
552 
553           text1_start = ++ptr;
554           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
555           if (rc != 0) goto PTREXIT;
556           text1_end = ptr;
557 
558           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
559             {
560             text2_start = ++ptr;
561             rc = find_text_end(code, &ptr, repend, TRUE);
562             if (rc != 0) goto PTREXIT;
563             text2_end = ptr;
564             }
565           }
566 
567         else
568           {
569           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
570             {
571             rc = PCRE2_ERROR_REPMISSINGBRACE;
572             goto PTREXIT;
573             }
574           }
575 
576         ptr++;
577         }
578 
579       /* Have found a syntactically correct group number or name, or *name.
580       Only *MARK is currently recognized. */
581 
582       if (star)
583         {
584         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
585           {
586           PCRE2_SPTR mark = pcre2_get_mark(match_data);
587           if (mark != NULL)
588             {
589             PCRE2_SPTR mark_start = mark;
590             while (*mark != 0) mark++;
591             fraglength = mark - mark_start;
592             CHECKMEMCPY(mark_start, fraglength);
593             }
594           }
595         else goto BAD;
596         }
597 
598       /* Substitute the contents of a group. We don't use substring_copy
599       functions any more, in order to support case forcing. */
600 
601       else
602         {
603         PCRE2_SPTR subptr, subptrend;
604 
605         /* Find a number for a named group. In case there are duplicate names,
606         search for the first one that is set. If the name is not found when
607         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
608         non-existent group. */
609 
610         if (group < 0)
611           {
612           PCRE2_SPTR first, last, entry;
613           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
614           if (rc == PCRE2_ERROR_NOSUBSTRING &&
615               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
616             {
617             group = code->top_bracket + 1;
618             }
619           else
620             {
621             if (rc < 0) goto PTREXIT;
622             for (entry = first; entry <= last; entry += rc)
623               {
624               uint32_t ng = GET2(entry, 0);
625               if (ng < ovector_count)
626                 {
627                 if (group < 0) group = ng;          /* First in ovector */
628                 if (ovector[ng*2] != PCRE2_UNSET)
629                   {
630                   group = ng;                       /* First that is set */
631                   break;
632                   }
633                 }
634               }
635 
636             /* If group is still negative, it means we did not find a group
637             that is in the ovector. Just set the first group. */
638 
639             if (group < 0) group = GET2(first, 0);
640             }
641           }
642 
643         /* We now have a group that is identified by number. Find the length of
644         the captured string. If a group in a non-special substitution is unset
645         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
646 
647         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
648         if (rc < 0)
649           {
650           if (rc == PCRE2_ERROR_NOSUBSTRING &&
651               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
652             {
653             rc = PCRE2_ERROR_UNSET;
654             }
655           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
656           if (special == 0)                           /* Plain substitution */
657             {
658             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
659             goto PTREXIT;                             /* Else error */
660             }
661           }
662 
663         /* If special is '+' we have a 'set' and possibly an 'unset' text,
664         both of which are reprocessed when used. If special is '-' we have a
665         default text for when the group is unset; it must be reprocessed. */
666 
667         if (special != 0)
668           {
669           if (special == CHAR_MINUS)
670             {
671             if (rc == 0) goto LITERAL_SUBSTITUTE;
672             text2_start = text1_start;
673             text2_end = text1_end;
674             }
675 
676           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
677           ptrstack[ptrstackptr++] = ptr;
678           ptrstack[ptrstackptr++] = repend;
679 
680           if (rc == 0)
681             {
682             ptr = text1_start;
683             repend = text1_end;
684             }
685           else
686             {
687             ptr = text2_start;
688             repend = text2_end;
689             }
690           continue;
691           }
692 
693         /* Otherwise we have a literal substitution of a group's contents. */
694 
695         LITERAL_SUBSTITUTE:
696         subptr = subject + ovector[group*2];
697         subptrend = subject + ovector[group*2 + 1];
698 
699         /* Substitute a literal string, possibly forcing alphabetic case. */
700 
701         while (subptr < subptrend)
702           {
703           GETCHARINCTEST(ch, subptr);
704           if (forcecase != 0)
705             {
706 #ifdef SUPPORT_UNICODE
707             if (utf)
708               {
709               uint32_t type = UCD_CHARTYPE(ch);
710               if (PRIV(ucp_gentype)[type] == ucp_L &&
711                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
712                 ch = UCD_OTHERCASE(ch);
713               }
714             else
715 #endif
716               {
717               if (((code->tables + cbits_offset +
718                   ((forcecase > 0)? cbit_upper:cbit_lower)
719                   )[ch/8] & (1u << (ch%8))) == 0)
720                 ch = (code->tables + fcc_offset)[ch];
721               }
722             forcecase = forcecasereset;
723             }
724 
725 #ifdef SUPPORT_UNICODE
726           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
727 #endif
728             {
729             temp[0] = ch;
730             chlen = 1;
731             }
732           CHECKMEMCPY(temp, chlen);
733           }
734         }
735       }
736 
737     /* Handle an escape sequence in extended mode. We can use check_escape()
738     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
739     the case-forcing escapes are not supported in pcre2_compile() so must be
740     recognized here. */
741 
742     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
743               *ptr == CHAR_BACKSLASH)
744       {
745       int errorcode;
746 
747       if (ptr < repend - 1) switch (ptr[1])
748         {
749         case CHAR_L:
750         forcecase = forcecasereset = -1;
751         ptr += 2;
752         continue;
753 
754         case CHAR_l:
755         forcecase = -1;
756         forcecasereset = 0;
757         ptr += 2;
758         continue;
759 
760         case CHAR_U:
761         forcecase = forcecasereset = 1;
762         ptr += 2;
763         continue;
764 
765         case CHAR_u:
766         forcecase = 1;
767         forcecasereset = 0;
768         ptr += 2;
769         continue;
770 
771         default:
772         break;
773         }
774 
775       ptr++;  /* Point after \ */
776       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
777         code->overall_options, code->extra_options, FALSE, NULL);
778       if (errorcode != 0) goto BADESCAPE;
779 
780       switch(rc)
781         {
782         case ESC_E:
783         forcecase = forcecasereset = 0;
784         continue;
785 
786         case ESC_Q:
787         literal = TRUE;
788         continue;
789 
790         case 0:      /* Data character */
791         goto LITERAL;
792 
793         default:
794         goto BADESCAPE;
795         }
796       }
797 
798     /* Handle a literal code unit */
799 
800     else
801       {
802       LOADLITERAL:
803       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
804 
805       LITERAL:
806       if (forcecase != 0)
807         {
808 #ifdef SUPPORT_UNICODE
809         if (utf)
810           {
811           uint32_t type = UCD_CHARTYPE(ch);
812           if (PRIV(ucp_gentype)[type] == ucp_L &&
813               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
814             ch = UCD_OTHERCASE(ch);
815           }
816         else
817 #endif
818           {
819           if (((code->tables + cbits_offset +
820               ((forcecase > 0)? cbit_upper:cbit_lower)
821               )[ch/8] & (1u << (ch%8))) == 0)
822             ch = (code->tables + fcc_offset)[ch];
823           }
824         forcecase = forcecasereset;
825         }
826 
827 #ifdef SUPPORT_UNICODE
828       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
829 #endif
830         {
831         temp[0] = ch;
832         chlen = 1;
833         }
834       CHECKMEMCPY(temp, chlen);
835       } /* End handling a literal code unit */
836     }   /* End of loop for scanning the replacement. */
837 
838   /* The replacement has been copied to the output, or its size has been
839   remembered. Do the callout if there is one and we have done an actual
840   replacement. */
841 
842   if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
843     {
844     scb.subscount = subs;
845     scb.output_offsets[1] = buff_offset;
846     rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
847 
848     /* A non-zero return means cancel this substitution. Instead, copy the
849     matched string fragment. */
850 
851     if (rc != 0)
852       {
853       PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
854       PCRE2_SIZE oldlength = ovector[1] - ovector[0];
855 
856       buff_offset -= newlength;
857       lengthleft += newlength;
858       CHECKMEMCPY(subject + ovector[0], oldlength);
859 
860       /* A negative return means do not do any more. */
861 
862       if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
863       }
864     }
865 
866   /* Save the details of this match. See above for how this data is used. If we
867   matched an empty string, do the magic for global matches. Finally, update the
868   start offset to point to the rest of the subject string. */
869 
870   ovecsave[0] = ovector[0];
871   ovecsave[1] = ovector[1];
872   ovecsave[2] = start_offset;
873 
874   goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
875     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
876   start_offset = ovector[1];
877   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
878 
879 /* Copy the rest of the subject. */
880 
881 fraglength = length - start_offset;
882 CHECKMEMCPY(subject + start_offset, fraglength);
883 temp[0] = 0;
884 CHECKMEMCPY(temp , 1);
885 
886 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
887 and matching has carried on after a full buffer, in order to compute the length
888 needed. Otherwise, an overflow generates an immediate error return. */
889 
890 if (overflowed)
891   {
892   rc = PCRE2_ERROR_NOMEMORY;
893   *blength = buff_length + extra_needed;
894   }
895 
896 /* After a successful execution, return the number of substitutions and set the
897 length of buffer used, excluding the trailing zero. */
898 
899 else
900   {
901   rc = subs;
902   *blength = buff_offset - 1;
903   }
904 
905 EXIT:
906 if (match_data_created) pcre2_match_data_free(match_data);
907   else match_data->rc = rc;
908 return rc;
909 
910 NOROOM:
911 rc = PCRE2_ERROR_NOMEMORY;
912 goto EXIT;
913 
914 BAD:
915 rc = PCRE2_ERROR_BADREPLACEMENT;
916 goto PTREXIT;
917 
918 BADESCAPE:
919 rc = PCRE2_ERROR_BADREPESCAPE;
920 
921 PTREXIT:
922 *blength = (PCRE2_SIZE)(ptr - replacement);
923 goto EXIT;
924 }
925 
926 /* End of pcre2_substitute.c */
927