• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2018 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #define PTR_STACK_SIZE 20
49 
50 #define SUBSTITUTE_OPTIONS \
51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
54 
55 
56 
57 /*************************************************
58 *           Find end of substitute text          *
59 *************************************************/
60 
61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
62 constructions. This requires the identification of unescaped : and }
63 characters. This function scans for such. It must deal with nested ${
64 constructions. The pointer to the text is updated, either to the required end
65 character, or to where an error was detected.
66 
67 Arguments:
68   code      points to the compiled expression (for options)
69   ptrptr    points to the pointer to the start of the text (updated)
70   ptrend    end of the whole string
71   last      TRUE if the last expected string (only } recognized)
72 
73 Returns:    0 on success
74             negative error code on failure
75 */
76 
77 static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
79   BOOL last)
80 {
81 int rc = 0;
82 uint32_t nestlevel = 0;
83 BOOL literal = FALSE;
84 PCRE2_SPTR ptr = *ptrptr;
85 
86 for (; ptr < ptrend; ptr++)
87   {
88   if (literal)
89     {
90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
91       {
92       literal = FALSE;
93       ptr += 1;
94       }
95     }
96 
97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
98     {
99     if (nestlevel == 0) goto EXIT;
100     nestlevel--;
101     }
102 
103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
104 
105   else if (*ptr == CHAR_DOLLAR_SIGN)
106     {
107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
108       {
109       nestlevel++;
110       ptr += 1;
111       }
112     }
113 
114   else if (*ptr == CHAR_BACKSLASH)
115     {
116     int erc;
117     int errorcode;
118     uint32_t ch;
119 
120     if (ptr < ptrend - 1) switch (ptr[1])
121       {
122       case CHAR_L:
123       case CHAR_l:
124       case CHAR_U:
125       case CHAR_u:
126       ptr += 1;
127       continue;
128       }
129 
130     ptr += 1;  /* Must point after \ */
131     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
132       code->overall_options, FALSE, NULL);
133     ptr -= 1;  /* Back to last code unit of escape */
134     if (errorcode != 0)
135       {
136       rc = errorcode;
137       goto EXIT;
138       }
139 
140     switch(erc)
141       {
142       case 0:      /* Data character */
143       case ESC_E:  /* Isolated \E is ignored */
144       break;
145 
146       case ESC_Q:
147       literal = TRUE;
148       break;
149 
150       default:
151       rc = PCRE2_ERROR_BADREPESCAPE;
152       goto EXIT;
153       }
154     }
155   }
156 
157 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
158 
159 EXIT:
160 *ptrptr = ptr;
161 return rc;
162 }
163 
164 
165 
166 /*************************************************
167 *              Match and substitute              *
168 *************************************************/
169 
170 /* This function applies a compiled re to a subject string and creates a new
171 string with substitutions. The first 7 arguments are the same as for
172 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
173 
174 Arguments:
175   code            points to the compiled expression
176   subject         points to the subject string
177   length          length of subject string (may contain binary zeros)
178   start_offset    where to start in the subject string
179   options         option bits
180   match_data      points to a match_data block, or is NULL
181   context         points a PCRE2 context
182   replacement     points to the replacement string
183   rlength         length of replacement string
184   buffer          where to put the substituted string
185   blength         points to length of buffer; updated to length of string
186 
187 Returns:          >= 0 number of substitutions made
188                   < 0 an error code
189                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
190 */
191 
192 /* This macro checks for space in the buffer before copying into it. On
193 overflow, either give an error immediately, or keep on, accumulating the
194 length. */
195 
196 #define CHECKMEMCPY(from,length) \
197   if (!overflowed && lengthleft < length) \
198     { \
199     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
200     overflowed = TRUE; \
201     extra_needed = length - lengthleft; \
202     } \
203   else if (overflowed) \
204     { \
205     extra_needed += length; \
206     }  \
207   else \
208     {  \
209     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
210     buff_offset += length; \
211     lengthleft -= length; \
212     }
213 
214 /* Here's the function */
215 
216 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)217 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
218   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
219   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
220   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
221 {
222 int rc;
223 int subs;
224 int forcecase = 0;
225 int forcecasereset = 0;
226 uint32_t ovector_count;
227 uint32_t goptions = 0;
228 uint32_t suboptions;
229 BOOL match_data_created = FALSE;
230 BOOL literal = FALSE;
231 BOOL overflowed = FALSE;
232 #ifdef SUPPORT_UNICODE
233 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
234 #endif
235 PCRE2_UCHAR temp[6];
236 PCRE2_SPTR ptr;
237 PCRE2_SPTR repend;
238 PCRE2_SIZE extra_needed = 0;
239 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
240 PCRE2_SIZE *ovector;
241 PCRE2_SIZE ovecsave[3];
242 
243 buff_offset = 0;
244 lengthleft = buff_length = *blength;
245 *blength = PCRE2_UNSET;
246 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
247 
248 /* Partial matching is not valid. */
249 
250 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
251   return PCRE2_ERROR_BADOPTION;
252 
253 /* If no match data block is provided, create one. */
254 
255 if (match_data == NULL)
256   {
257   pcre2_general_context *gcontext = (mcontext == NULL)?
258     (pcre2_general_context *)code :
259     (pcre2_general_context *)mcontext;
260   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
261   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
262   match_data_created = TRUE;
263   }
264 ovector = pcre2_get_ovector_pointer(match_data);
265 ovector_count = pcre2_get_ovector_count(match_data);
266 
267 /* Find lengths of zero-terminated strings and the end of the replacement. */
268 
269 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
270 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
271 repend = replacement + rlength;
272 
273 /* Check UTF replacement string if necessary. */
274 
275 #ifdef SUPPORT_UNICODE
276 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
277   {
278   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
279   if (rc != 0)
280     {
281     match_data->leftchar = 0;
282     goto EXIT;
283     }
284   }
285 #endif  /* SUPPORT_UNICODE */
286 
287 /* Save the substitute options and remove them from the match options. */
288 
289 suboptions = options & SUBSTITUTE_OPTIONS;
290 options &= ~SUBSTITUTE_OPTIONS;
291 
292 /* Copy up to the start offset */
293 
294 if (start_offset > length)
295   {
296   match_data->leftchar = 0;
297   rc = PCRE2_ERROR_BADOFFSET;
298   goto EXIT;
299   }
300 CHECKMEMCPY(subject, start_offset);
301 
302 /* Loop for global substituting. */
303 
304 subs = 0;
305 do
306   {
307   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
308   uint32_t ptrstackptr = 0;
309 
310   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
311     match_data, mcontext);
312 
313 #ifdef SUPPORT_UNICODE
314   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
315 #endif
316 
317   /* Any error other than no match returns the error code. No match when not
318   doing the special after-empty-match global rematch, or when at the end of the
319   subject, breaks the global loop. Otherwise, advance the starting point by one
320   character, copying it to the output, and try again. */
321 
322   if (rc < 0)
323     {
324     PCRE2_SIZE save_start;
325 
326     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
327     if (goptions == 0 || start_offset >= length) break;
328 
329     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
330     we have advanced into the middle of it, advance one more code point. In
331     other words, do not start in the middle of CRLF, even if CR and LF on their
332     own are valid newlines. */
333 
334     save_start = start_offset++;
335     if (subject[start_offset-1] == CHAR_CR &&
336         code->newline_convention != PCRE2_NEWLINE_CR &&
337         code->newline_convention != PCRE2_NEWLINE_LF &&
338         start_offset < length &&
339         subject[start_offset] == CHAR_LF)
340       start_offset++;
341 
342     /* Otherwise, in UTF mode, advance past any secondary code points. */
343 
344     else if ((code->overall_options & PCRE2_UTF) != 0)
345       {
346 #if PCRE2_CODE_UNIT_WIDTH == 8
347       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
348         start_offset++;
349 #elif PCRE2_CODE_UNIT_WIDTH == 16
350       while (start_offset < length &&
351             (subject[start_offset] & 0xfc00) == 0xdc00)
352         start_offset++;
353 #endif
354       }
355 
356     /* Copy what we have advanced past, reset the special global options, and
357     continue to the next match. */
358 
359     fraglength = start_offset - save_start;
360     CHECKMEMCPY(subject + save_start, fraglength);
361     goptions = 0;
362     continue;
363     }
364 
365   /* Handle a successful match. Matches that use \K to end before they start
366   or start before the current point in the subject are not supported. */
367 
368   if (ovector[1] < ovector[0] || ovector[0] < start_offset)
369     {
370     rc = PCRE2_ERROR_BADSUBSPATTERN;
371     goto EXIT;
372     }
373 
374   /* Check for the same match as previous. This is legitimate after matching an
375   empty string that starts after the initial match offset. We have tried again
376   at the match point in case the pattern is one like /(?<=\G.)/ which can never
377   match at its starting point, so running the match achieves the bumpalong. If
378   we do get the same (null) match at the original match point, it isn't such a
379   pattern, so we now do the empty string magic. In all other cases, a repeat
380   match should never occur. */
381 
382   if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
383     {
384     if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
385       {
386       goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
387       ovecsave[2] = start_offset;
388       continue;    /* Back to the top of the loop */
389       }
390     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
391     goto EXIT;
392     }
393 
394   /* Count substitutions with a paranoid check for integer overflow; surely no
395   real call to this function would ever hit this! */
396 
397   if (subs == INT_MAX)
398     {
399     rc = PCRE2_ERROR_TOOMANYREPLACE;
400     goto EXIT;
401     }
402   subs++;
403 
404   /* Copy the text leading up to the match. */
405 
406   if (rc == 0) rc = ovector_count;
407   fraglength = ovector[0] - start_offset;
408   CHECKMEMCPY(subject + start_offset, fraglength);
409 
410   /* Process the replacement string. Literal mode is set by \Q, but only in
411   extended mode when backslashes are being interpreted. In extended mode we
412   must handle nested substrings that are to be reprocessed. */
413 
414   ptr = replacement;
415   for (;;)
416     {
417     uint32_t ch;
418     unsigned int chlen;
419 
420     /* If at the end of a nested substring, pop the stack. */
421 
422     if (ptr >= repend)
423       {
424       if (ptrstackptr <= 0) break;       /* End of replacement string */
425       repend = ptrstack[--ptrstackptr];
426       ptr = ptrstack[--ptrstackptr];
427       continue;
428       }
429 
430     /* Handle the next character */
431 
432     if (literal)
433       {
434       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
435         {
436         literal = FALSE;
437         ptr += 2;
438         continue;
439         }
440       goto LOADLITERAL;
441       }
442 
443     /* Not in literal mode. */
444 
445     if (*ptr == CHAR_DOLLAR_SIGN)
446       {
447       int group, n;
448       uint32_t special = 0;
449       BOOL inparens;
450       BOOL star;
451       PCRE2_SIZE sublength;
452       PCRE2_SPTR text1_start = NULL;
453       PCRE2_SPTR text1_end = NULL;
454       PCRE2_SPTR text2_start = NULL;
455       PCRE2_SPTR text2_end = NULL;
456       PCRE2_UCHAR next;
457       PCRE2_UCHAR name[33];
458 
459       if (++ptr >= repend) goto BAD;
460       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
461 
462       group = -1;
463       n = 0;
464       inparens = FALSE;
465       star = FALSE;
466 
467       if (next == CHAR_LEFT_CURLY_BRACKET)
468         {
469         if (++ptr >= repend) goto BAD;
470         next = *ptr;
471         inparens = TRUE;
472         }
473 
474       if (next == CHAR_ASTERISK)
475         {
476         if (++ptr >= repend) goto BAD;
477         next = *ptr;
478         star = TRUE;
479         }
480 
481       if (!star && next >= CHAR_0 && next <= CHAR_9)
482         {
483         group = next - CHAR_0;
484         while (++ptr < repend)
485           {
486           next = *ptr;
487           if (next < CHAR_0 || next > CHAR_9) break;
488           group = group * 10 + next - CHAR_0;
489 
490           /* A check for a number greater than the hightest captured group
491           is sufficient here; no need for a separate overflow check. If unknown
492           groups are to be treated as unset, just skip over any remaining
493           digits and carry on. */
494 
495           if (group > code->top_bracket)
496             {
497             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
498               {
499               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
500               break;
501               }
502             else
503               {
504               rc = PCRE2_ERROR_NOSUBSTRING;
505               goto PTREXIT;
506               }
507             }
508           }
509         }
510       else
511         {
512         const uint8_t *ctypes = code->tables + ctypes_offset;
513         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
514           {
515           name[n++] = next;
516           if (n > 32) goto BAD;
517           if (++ptr >= repend) break;
518           next = *ptr;
519           }
520         if (n == 0) goto BAD;
521         name[n] = 0;
522         }
523 
524       /* In extended mode we recognize ${name:+set text:unset text} and
525       ${name:-default text}. */
526 
527       if (inparens)
528         {
529         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
530              !star && ptr < repend - 2 && next == CHAR_COLON)
531           {
532           special = *(++ptr);
533           if (special != CHAR_PLUS && special != CHAR_MINUS)
534             {
535             rc = PCRE2_ERROR_BADSUBSTITUTION;
536             goto PTREXIT;
537             }
538 
539           text1_start = ++ptr;
540           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
541           if (rc != 0) goto PTREXIT;
542           text1_end = ptr;
543 
544           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
545             {
546             text2_start = ++ptr;
547             rc = find_text_end(code, &ptr, repend, TRUE);
548             if (rc != 0) goto PTREXIT;
549             text2_end = ptr;
550             }
551           }
552 
553         else
554           {
555           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
556             {
557             rc = PCRE2_ERROR_REPMISSINGBRACE;
558             goto PTREXIT;
559             }
560           }
561 
562         ptr++;
563         }
564 
565       /* Have found a syntactically correct group number or name, or *name.
566       Only *MARK is currently recognized. */
567 
568       if (star)
569         {
570         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
571           {
572           PCRE2_SPTR mark = pcre2_get_mark(match_data);
573           if (mark != NULL)
574             {
575             PCRE2_SPTR mark_start = mark;
576             while (*mark != 0) mark++;
577             fraglength = mark - mark_start;
578             CHECKMEMCPY(mark_start, fraglength);
579             }
580           }
581         else goto BAD;
582         }
583 
584       /* Substitute the contents of a group. We don't use substring_copy
585       functions any more, in order to support case forcing. */
586 
587       else
588         {
589         PCRE2_SPTR subptr, subptrend;
590 
591         /* Find a number for a named group. In case there are duplicate names,
592         search for the first one that is set. If the name is not found when
593         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
594         non-existent group. */
595 
596         if (group < 0)
597           {
598           PCRE2_SPTR first, last, entry;
599           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
600           if (rc == PCRE2_ERROR_NOSUBSTRING &&
601               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
602             {
603             group = code->top_bracket + 1;
604             }
605           else
606             {
607             if (rc < 0) goto PTREXIT;
608             for (entry = first; entry <= last; entry += rc)
609               {
610               uint32_t ng = GET2(entry, 0);
611               if (ng < ovector_count)
612                 {
613                 if (group < 0) group = ng;          /* First in ovector */
614                 if (ovector[ng*2] != PCRE2_UNSET)
615                   {
616                   group = ng;                       /* First that is set */
617                   break;
618                   }
619                 }
620               }
621 
622             /* If group is still negative, it means we did not find a group
623             that is in the ovector. Just set the first group. */
624 
625             if (group < 0) group = GET2(first, 0);
626             }
627           }
628 
629         /* We now have a group that is identified by number. Find the length of
630         the captured string. If a group in a non-special substitution is unset
631         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
632 
633         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
634         if (rc < 0)
635           {
636           if (rc == PCRE2_ERROR_NOSUBSTRING &&
637               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
638             {
639             rc = PCRE2_ERROR_UNSET;
640             }
641           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
642           if (special == 0)                           /* Plain substitution */
643             {
644             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
645             goto PTREXIT;                             /* Else error */
646             }
647           }
648 
649         /* If special is '+' we have a 'set' and possibly an 'unset' text,
650         both of which are reprocessed when used. If special is '-' we have a
651         default text for when the group is unset; it must be reprocessed. */
652 
653         if (special != 0)
654           {
655           if (special == CHAR_MINUS)
656             {
657             if (rc == 0) goto LITERAL_SUBSTITUTE;
658             text2_start = text1_start;
659             text2_end = text1_end;
660             }
661 
662           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
663           ptrstack[ptrstackptr++] = ptr;
664           ptrstack[ptrstackptr++] = repend;
665 
666           if (rc == 0)
667             {
668             ptr = text1_start;
669             repend = text1_end;
670             }
671           else
672             {
673             ptr = text2_start;
674             repend = text2_end;
675             }
676           continue;
677           }
678 
679         /* Otherwise we have a literal substitution of a group's contents. */
680 
681         LITERAL_SUBSTITUTE:
682         subptr = subject + ovector[group*2];
683         subptrend = subject + ovector[group*2 + 1];
684 
685         /* Substitute a literal string, possibly forcing alphabetic case. */
686 
687         while (subptr < subptrend)
688           {
689           GETCHARINCTEST(ch, subptr);
690           if (forcecase != 0)
691             {
692 #ifdef SUPPORT_UNICODE
693             if (utf)
694               {
695               uint32_t type = UCD_CHARTYPE(ch);
696               if (PRIV(ucp_gentype)[type] == ucp_L &&
697                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
698                 ch = UCD_OTHERCASE(ch);
699               }
700             else
701 #endif
702               {
703               if (((code->tables + cbits_offset +
704                   ((forcecase > 0)? cbit_upper:cbit_lower)
705                   )[ch/8] & (1 << (ch%8))) == 0)
706                 ch = (code->tables + fcc_offset)[ch];
707               }
708             forcecase = forcecasereset;
709             }
710 
711 #ifdef SUPPORT_UNICODE
712           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
713 #endif
714             {
715             temp[0] = ch;
716             chlen = 1;
717             }
718           CHECKMEMCPY(temp, chlen);
719           }
720         }
721       }
722 
723     /* Handle an escape sequence in extended mode. We can use check_escape()
724     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
725     the case-forcing escapes are not supported in pcre2_compile() so must be
726     recognized here. */
727 
728     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
729               *ptr == CHAR_BACKSLASH)
730       {
731       int errorcode;
732 
733       if (ptr < repend - 1) switch (ptr[1])
734         {
735         case CHAR_L:
736         forcecase = forcecasereset = -1;
737         ptr += 2;
738         continue;
739 
740         case CHAR_l:
741         forcecase = -1;
742         forcecasereset = 0;
743         ptr += 2;
744         continue;
745 
746         case CHAR_U:
747         forcecase = forcecasereset = 1;
748         ptr += 2;
749         continue;
750 
751         case CHAR_u:
752         forcecase = 1;
753         forcecasereset = 0;
754         ptr += 2;
755         continue;
756 
757         default:
758         break;
759         }
760 
761       ptr++;  /* Point after \ */
762       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
763         code->overall_options, FALSE, NULL);
764       if (errorcode != 0) goto BADESCAPE;
765 
766       switch(rc)
767         {
768         case ESC_E:
769         forcecase = forcecasereset = 0;
770         continue;
771 
772         case ESC_Q:
773         literal = TRUE;
774         continue;
775 
776         case 0:      /* Data character */
777         goto LITERAL;
778 
779         default:
780         goto BADESCAPE;
781         }
782       }
783 
784     /* Handle a literal code unit */
785 
786     else
787       {
788       LOADLITERAL:
789       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
790 
791       LITERAL:
792       if (forcecase != 0)
793         {
794 #ifdef SUPPORT_UNICODE
795         if (utf)
796           {
797           uint32_t type = UCD_CHARTYPE(ch);
798           if (PRIV(ucp_gentype)[type] == ucp_L &&
799               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
800             ch = UCD_OTHERCASE(ch);
801           }
802         else
803 #endif
804           {
805           if (((code->tables + cbits_offset +
806               ((forcecase > 0)? cbit_upper:cbit_lower)
807               )[ch/8] & (1 << (ch%8))) == 0)
808             ch = (code->tables + fcc_offset)[ch];
809           }
810         forcecase = forcecasereset;
811         }
812 
813 #ifdef SUPPORT_UNICODE
814       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
815 #endif
816         {
817         temp[0] = ch;
818         chlen = 1;
819         }
820       CHECKMEMCPY(temp, chlen);
821       } /* End handling a literal code unit */
822     }   /* End of loop for scanning the replacement. */
823 
824   /* The replacement has been copied to the output. Save the details of this
825   match. See above for how this data is used. If we matched an empty string, do
826   the magic for global matches. Finally, update the start offset to point to
827   the rest of the subject string. */
828 
829   ovecsave[0] = ovector[0];
830   ovecsave[1] = ovector[1];
831   ovecsave[2] = start_offset;
832 
833   goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
834     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
835   start_offset = ovector[1];
836   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
837 
838 /* Copy the rest of the subject. */
839 
840 fraglength = length - start_offset;
841 CHECKMEMCPY(subject + start_offset, fraglength);
842 temp[0] = 0;
843 CHECKMEMCPY(temp , 1);
844 
845 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
846 and matching has carried on after a full buffer, in order to compute the length
847 needed. Otherwise, an overflow generates an immediate error return. */
848 
849 if (overflowed)
850   {
851   rc = PCRE2_ERROR_NOMEMORY;
852   *blength = buff_length + extra_needed;
853   }
854 
855 /* After a successful execution, return the number of substitutions and set the
856 length of buffer used, excluding the trailing zero. */
857 
858 else
859   {
860   rc = subs;
861   *blength = buff_offset - 1;
862   }
863 
864 EXIT:
865 if (match_data_created) pcre2_match_data_free(match_data);
866   else match_data->rc = rc;
867 return rc;
868 
869 NOROOM:
870 rc = PCRE2_ERROR_NOMEMORY;
871 goto EXIT;
872 
873 BAD:
874 rc = PCRE2_ERROR_BADREPLACEMENT;
875 goto PTREXIT;
876 
877 BADESCAPE:
878 rc = PCRE2_ERROR_BADREPESCAPE;
879 
880 PTREXIT:
881 *blength = (PCRE2_SIZE)(ptr - replacement);
882 goto EXIT;
883 }
884 
885 /* End of pcre2_substitute.c */
886