• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #if STRINGLIB_IS_UNICODE
2 # error "transmogrify.h only compatible with byte-wise strings"
3 #endif
4 
5 /* the more complicated methods.  parts of these should be pulled out into the
6    shared code in bytes_methods.c to cut down on duplicate code bloat.  */
7 
8 /*[clinic input]
9 class B "PyObject *" "&PyType_Type"
10 [clinic start generated code]*/
11 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12 
13 #include "clinic/transmogrify.h.h"
14 
15 static inline PyObject *
return_self(PyObject * self)16 return_self(PyObject *self)
17 {
18 #if !STRINGLIB_MUTABLE
19     if (STRINGLIB_CHECK_EXACT(self)) {
20         return Py_NewRef(self);
21     }
22 #endif
23     return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
24 }
25 
26 /*[clinic input]
27 B.expandtabs as stringlib_expandtabs
28 
29     tabsize: int = 8
30 
31 Return a copy where all tab characters are expanded using spaces.
32 
33 If tabsize is not given, a tab size of 8 characters is assumed.
34 [clinic start generated code]*/
35 
36 static PyObject *
stringlib_expandtabs_impl(PyObject * self,int tabsize)37 stringlib_expandtabs_impl(PyObject *self, int tabsize)
38 /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
39 {
40     const char *e, *p;
41     char *q;
42     Py_ssize_t i, j;
43     PyObject *u;
44 
45     /* First pass: determine size of output string */
46     i = j = 0;
47     e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
48     for (p = STRINGLIB_STR(self); p < e; p++) {
49         if (*p == '\t') {
50             if (tabsize > 0) {
51                 Py_ssize_t incr = tabsize - (j % tabsize);
52                 if (j > PY_SSIZE_T_MAX - incr)
53                     goto overflow;
54                 j += incr;
55             }
56         }
57         else {
58             if (j > PY_SSIZE_T_MAX - 1)
59                 goto overflow;
60             j++;
61             if (*p == '\n' || *p == '\r') {
62                 if (i > PY_SSIZE_T_MAX - j)
63                     goto overflow;
64                 i += j;
65                 j = 0;
66             }
67         }
68     }
69 
70     if (i > PY_SSIZE_T_MAX - j)
71         goto overflow;
72 
73     /* Second pass: create output string and fill it */
74     u = STRINGLIB_NEW(NULL, i + j);
75     if (!u)
76         return NULL;
77 
78     j = 0;
79     q = STRINGLIB_STR(u);
80 
81     for (p = STRINGLIB_STR(self); p < e; p++) {
82         if (*p == '\t') {
83             if (tabsize > 0) {
84                 i = tabsize - (j % tabsize);
85                 j += i;
86                 while (i--)
87                     *q++ = ' ';
88             }
89         }
90         else {
91             j++;
92             *q++ = *p;
93             if (*p == '\n' || *p == '\r')
94                 j = 0;
95         }
96     }
97 
98     return u;
99   overflow:
100     PyErr_SetString(PyExc_OverflowError, "result too long");
101     return NULL;
102 }
103 
104 static inline PyObject *
pad(PyObject * self,Py_ssize_t left,Py_ssize_t right,char fill)105 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
106 {
107     PyObject *u;
108 
109     if (left < 0)
110         left = 0;
111     if (right < 0)
112         right = 0;
113 
114     if (left == 0 && right == 0) {
115         return return_self(self);
116     }
117 
118     u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
119     if (u) {
120         if (left)
121             memset(STRINGLIB_STR(u), fill, left);
122         memcpy(STRINGLIB_STR(u) + left,
123                STRINGLIB_STR(self),
124                STRINGLIB_LEN(self));
125         if (right)
126             memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
127                    fill, right);
128     }
129 
130     return u;
131 }
132 
133 /*[clinic input]
134 B.ljust as stringlib_ljust
135 
136     width: Py_ssize_t
137     fillchar: char = b' '
138     /
139 
140 Return a left-justified string of length width.
141 
142 Padding is done using the specified fill character.
143 [clinic start generated code]*/
144 
145 static PyObject *
stringlib_ljust_impl(PyObject * self,Py_ssize_t width,char fillchar)146 stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
147 /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
148 {
149     if (STRINGLIB_LEN(self) >= width) {
150         return return_self(self);
151     }
152 
153     return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
154 }
155 
156 
157 /*[clinic input]
158 B.rjust as stringlib_rjust
159 
160     width: Py_ssize_t
161     fillchar: char = b' '
162     /
163 
164 Return a right-justified string of length width.
165 
166 Padding is done using the specified fill character.
167 [clinic start generated code]*/
168 
169 static PyObject *
stringlib_rjust_impl(PyObject * self,Py_ssize_t width,char fillchar)170 stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
171 /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
172 {
173     if (STRINGLIB_LEN(self) >= width) {
174         return return_self(self);
175     }
176 
177     return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
178 }
179 
180 
181 /*[clinic input]
182 B.center as stringlib_center
183 
184     width: Py_ssize_t
185     fillchar: char = b' '
186     /
187 
188 Return a centered string of length width.
189 
190 Padding is done using the specified fill character.
191 [clinic start generated code]*/
192 
193 static PyObject *
stringlib_center_impl(PyObject * self,Py_ssize_t width,char fillchar)194 stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
195 /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
196 {
197     Py_ssize_t marg, left;
198 
199     if (STRINGLIB_LEN(self) >= width) {
200         return return_self(self);
201     }
202 
203     marg = width - STRINGLIB_LEN(self);
204     left = marg / 2 + (marg & width & 1);
205 
206     return pad(self, left, marg - left, fillchar);
207 }
208 
209 /*[clinic input]
210 B.zfill as stringlib_zfill
211 
212     width: Py_ssize_t
213     /
214 
215 Pad a numeric string with zeros on the left, to fill a field of the given width.
216 
217 The original string is never truncated.
218 [clinic start generated code]*/
219 
220 static PyObject *
stringlib_zfill_impl(PyObject * self,Py_ssize_t width)221 stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
222 /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
223 {
224     Py_ssize_t fill;
225     PyObject *s;
226     char *p;
227 
228     if (STRINGLIB_LEN(self) >= width) {
229         return return_self(self);
230     }
231 
232     fill = width - STRINGLIB_LEN(self);
233 
234     s = pad(self, fill, 0, '0');
235 
236     if (s == NULL)
237         return NULL;
238 
239     p = STRINGLIB_STR(s);
240     if (p[fill] == '+' || p[fill] == '-') {
241         /* move sign to beginning of string */
242         p[0] = p[fill];
243         p[fill] = '0';
244     }
245 
246     return s;
247 }
248 
249 
250 /* find and count characters and substrings */
251 
252 #define findchar(target, target_len, c)                         \
253   ((char *)memchr((const void *)(target), c, target_len))
254 
255 
256 static Py_ssize_t
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)257 countchar(const char *target, Py_ssize_t target_len, char c,
258           Py_ssize_t maxcount)
259 {
260     Py_ssize_t count = 0;
261     const char *start = target;
262     const char *end = target + target_len;
263 
264     while ((start = findchar(start, end - start, c)) != NULL) {
265         count++;
266         if (count >= maxcount)
267             break;
268         start += 1;
269     }
270     return count;
271 }
272 
273 
274 /* Algorithms for different cases of string replacement */
275 
276 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
277 static PyObject *
stringlib_replace_interleave(PyObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)278 stringlib_replace_interleave(PyObject *self,
279                              const char *to_s, Py_ssize_t to_len,
280                              Py_ssize_t maxcount)
281 {
282     const char *self_s;
283     char *result_s;
284     Py_ssize_t self_len, result_len;
285     Py_ssize_t count, i;
286     PyObject *result;
287 
288     self_len = STRINGLIB_LEN(self);
289 
290     /* 1 at the end plus 1 after every character;
291        count = min(maxcount, self_len + 1) */
292     if (maxcount <= self_len) {
293         count = maxcount;
294     }
295     else {
296         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
297         count = self_len + 1;
298     }
299 
300     /* Check for overflow */
301     /*   result_len = count * to_len + self_len; */
302     assert(count > 0);
303     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
304         PyErr_SetString(PyExc_OverflowError,
305                         "replace bytes is too long");
306         return NULL;
307     }
308     result_len = count * to_len + self_len;
309     result = STRINGLIB_NEW(NULL, result_len);
310     if (result == NULL) {
311         return NULL;
312     }
313 
314     self_s = STRINGLIB_STR(self);
315     result_s = STRINGLIB_STR(result);
316 
317     if (to_len > 1) {
318         /* Lay the first one down (guaranteed this will occur) */
319         memcpy(result_s, to_s, to_len);
320         result_s += to_len;
321         count -= 1;
322 
323         for (i = 0; i < count; i++) {
324             *result_s++ = *self_s++;
325             memcpy(result_s, to_s, to_len);
326             result_s += to_len;
327         }
328     }
329     else {
330         result_s[0] = to_s[0];
331         result_s += to_len;
332         count -= 1;
333         for (i = 0; i < count; i++) {
334             *result_s++ = *self_s++;
335             result_s[0] = to_s[0];
336             result_s += to_len;
337         }
338     }
339 
340     /* Copy the rest of the original string */
341     memcpy(result_s, self_s, self_len - i);
342 
343     return result;
344 }
345 
346 /* Special case for deleting a single character */
347 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
348 static PyObject *
stringlib_replace_delete_single_character(PyObject * self,char from_c,Py_ssize_t maxcount)349 stringlib_replace_delete_single_character(PyObject *self,
350                                           char from_c, Py_ssize_t maxcount)
351 {
352     const char *self_s, *start, *next, *end;
353     char *result_s;
354     Py_ssize_t self_len, result_len;
355     Py_ssize_t count;
356     PyObject *result;
357 
358     self_len = STRINGLIB_LEN(self);
359     self_s = STRINGLIB_STR(self);
360 
361     count = countchar(self_s, self_len, from_c, maxcount);
362     if (count == 0) {
363         return return_self(self);
364     }
365 
366     result_len = self_len - count;  /* from_len == 1 */
367     assert(result_len>=0);
368 
369     result = STRINGLIB_NEW(NULL, result_len);
370     if (result == NULL) {
371         return NULL;
372     }
373     result_s = STRINGLIB_STR(result);
374 
375     start = self_s;
376     end = self_s + self_len;
377     while (count-- > 0) {
378         next = findchar(start, end - start, from_c);
379         if (next == NULL)
380             break;
381         memcpy(result_s, start, next - start);
382         result_s += (next - start);
383         start = next + 1;
384     }
385     memcpy(result_s, start, end - start);
386 
387     return result;
388 }
389 
390 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
391 
392 static PyObject *
stringlib_replace_delete_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)393 stringlib_replace_delete_substring(PyObject *self,
394                                    const char *from_s, Py_ssize_t from_len,
395                                    Py_ssize_t maxcount)
396 {
397     const char *self_s, *start, *next, *end;
398     char *result_s;
399     Py_ssize_t self_len, result_len;
400     Py_ssize_t count, offset;
401     PyObject *result;
402 
403     self_len = STRINGLIB_LEN(self);
404     self_s = STRINGLIB_STR(self);
405 
406     count = stringlib_count(self_s, self_len,
407                             from_s, from_len,
408                             maxcount);
409 
410     if (count == 0) {
411         /* no matches */
412         return return_self(self);
413     }
414 
415     result_len = self_len - (count * from_len);
416     assert (result_len>=0);
417 
418     result = STRINGLIB_NEW(NULL, result_len);
419     if (result == NULL) {
420         return NULL;
421     }
422     result_s = STRINGLIB_STR(result);
423 
424     start = self_s;
425     end = self_s + self_len;
426     while (count-- > 0) {
427         offset = stringlib_find(start, end - start,
428                                 from_s, from_len,
429                                 0);
430         if (offset == -1)
431             break;
432         next = start + offset;
433 
434         memcpy(result_s, start, next - start);
435 
436         result_s += (next - start);
437         start = next + from_len;
438     }
439     memcpy(result_s, start, end - start);
440     return result;
441 }
442 
443 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
444 static PyObject *
stringlib_replace_single_character_in_place(PyObject * self,char from_c,char to_c,Py_ssize_t maxcount)445 stringlib_replace_single_character_in_place(PyObject *self,
446                                             char from_c, char to_c,
447                                             Py_ssize_t maxcount)
448 {
449     const char *self_s, *end;
450     char *result_s, *start, *next;
451     Py_ssize_t self_len;
452     PyObject *result;
453 
454     /* The result string will be the same size */
455     self_s = STRINGLIB_STR(self);
456     self_len = STRINGLIB_LEN(self);
457 
458     next = findchar(self_s, self_len, from_c);
459 
460     if (next == NULL) {
461         /* No matches; return the original bytes */
462         return return_self(self);
463     }
464 
465     /* Need to make a new bytes */
466     result = STRINGLIB_NEW(NULL, self_len);
467     if (result == NULL) {
468         return NULL;
469     }
470     result_s = STRINGLIB_STR(result);
471     memcpy(result_s, self_s, self_len);
472 
473     /* change everything in-place, starting with this one */
474     start =  result_s + (next - self_s);
475     *start = to_c;
476     start++;
477     end = result_s + self_len;
478 
479     while (--maxcount > 0) {
480         next = findchar(start, end - start, from_c);
481         if (next == NULL)
482             break;
483         *next = to_c;
484         start = next + 1;
485     }
486 
487     return result;
488 }
489 
490 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
491 static PyObject *
stringlib_replace_substring_in_place(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)492 stringlib_replace_substring_in_place(PyObject *self,
493                                      const char *from_s, Py_ssize_t from_len,
494                                      const char *to_s, Py_ssize_t to_len,
495                                      Py_ssize_t maxcount)
496 {
497     const char *self_s, *end;
498     char *result_s, *start;
499     Py_ssize_t self_len, offset;
500     PyObject *result;
501 
502     /* The result bytes will be the same size */
503 
504     self_s = STRINGLIB_STR(self);
505     self_len = STRINGLIB_LEN(self);
506 
507     offset = stringlib_find(self_s, self_len,
508                             from_s, from_len,
509                             0);
510     if (offset == -1) {
511         /* No matches; return the original bytes */
512         return return_self(self);
513     }
514 
515     /* Need to make a new bytes */
516     result = STRINGLIB_NEW(NULL, self_len);
517     if (result == NULL) {
518         return NULL;
519     }
520     result_s = STRINGLIB_STR(result);
521     memcpy(result_s, self_s, self_len);
522 
523     /* change everything in-place, starting with this one */
524     start =  result_s + offset;
525     memcpy(start, to_s, from_len);
526     start += from_len;
527     end = result_s + self_len;
528 
529     while ( --maxcount > 0) {
530         offset = stringlib_find(start, end - start,
531                                 from_s, from_len,
532                                 0);
533         if (offset == -1)
534             break;
535         memcpy(start + offset, to_s, from_len);
536         start += offset + from_len;
537     }
538 
539     return result;
540 }
541 
542 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
543 static PyObject *
stringlib_replace_single_character(PyObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)544 stringlib_replace_single_character(PyObject *self,
545                                    char from_c,
546                                    const char *to_s, Py_ssize_t to_len,
547                                    Py_ssize_t maxcount)
548 {
549     const char *self_s, *start, *next, *end;
550     char *result_s;
551     Py_ssize_t self_len, result_len;
552     Py_ssize_t count;
553     PyObject *result;
554 
555     self_s = STRINGLIB_STR(self);
556     self_len = STRINGLIB_LEN(self);
557 
558     count = countchar(self_s, self_len, from_c, maxcount);
559     if (count == 0) {
560         /* no matches, return unchanged */
561         return return_self(self);
562     }
563 
564     /* use the difference between current and new, hence the "-1" */
565     /*   result_len = self_len + count * (to_len-1)  */
566     assert(count > 0);
567     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
568         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
569         return NULL;
570     }
571     result_len = self_len + count * (to_len - 1);
572 
573     result = STRINGLIB_NEW(NULL, result_len);
574     if (result == NULL) {
575         return NULL;
576     }
577     result_s = STRINGLIB_STR(result);
578 
579     start = self_s;
580     end = self_s + self_len;
581     while (count-- > 0) {
582         next = findchar(start, end - start, from_c);
583         if (next == NULL)
584             break;
585 
586         if (next == start) {
587             /* replace with the 'to' */
588             memcpy(result_s, to_s, to_len);
589             result_s += to_len;
590             start += 1;
591         } else {
592             /* copy the unchanged old then the 'to' */
593             memcpy(result_s, start, next - start);
594             result_s += (next - start);
595             memcpy(result_s, to_s, to_len);
596             result_s += to_len;
597             start = next + 1;
598         }
599     }
600     /* Copy the remainder of the remaining bytes */
601     memcpy(result_s, start, end - start);
602 
603     return result;
604 }
605 
606 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
607 static PyObject *
stringlib_replace_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)608 stringlib_replace_substring(PyObject *self,
609                             const char *from_s, Py_ssize_t from_len,
610                             const char *to_s, Py_ssize_t to_len,
611                             Py_ssize_t maxcount)
612 {
613     const char *self_s, *start, *next, *end;
614     char *result_s;
615     Py_ssize_t self_len, result_len;
616     Py_ssize_t count, offset;
617     PyObject *result;
618 
619     self_s = STRINGLIB_STR(self);
620     self_len = STRINGLIB_LEN(self);
621 
622     count = stringlib_count(self_s, self_len,
623                             from_s, from_len,
624                             maxcount);
625 
626     if (count == 0) {
627         /* no matches, return unchanged */
628         return return_self(self);
629     }
630 
631     /* Check for overflow */
632     /*    result_len = self_len + count * (to_len-from_len) */
633     assert(count > 0);
634     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
635         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
636         return NULL;
637     }
638     result_len = self_len + count * (to_len - from_len);
639 
640     result = STRINGLIB_NEW(NULL, result_len);
641     if (result == NULL) {
642         return NULL;
643     }
644     result_s = STRINGLIB_STR(result);
645 
646     start = self_s;
647     end = self_s + self_len;
648     while (count-- > 0) {
649         offset = stringlib_find(start, end - start,
650                                 from_s, from_len,
651                                 0);
652         if (offset == -1)
653             break;
654         next = start + offset;
655         if (next == start) {
656             /* replace with the 'to' */
657             memcpy(result_s, to_s, to_len);
658             result_s += to_len;
659             start += from_len;
660         } else {
661             /* copy the unchanged old then the 'to' */
662             memcpy(result_s, start, next - start);
663             result_s += (next - start);
664             memcpy(result_s, to_s, to_len);
665             result_s += to_len;
666             start = next + from_len;
667         }
668     }
669     /* Copy the remainder of the remaining bytes */
670     memcpy(result_s, start, end - start);
671 
672     return result;
673 }
674 
675 
676 static PyObject *
stringlib_replace(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)677 stringlib_replace(PyObject *self,
678                   const char *from_s, Py_ssize_t from_len,
679                   const char *to_s, Py_ssize_t to_len,
680                   Py_ssize_t maxcount)
681 {
682     if (STRINGLIB_LEN(self) < from_len) {
683         /* nothing to do; return the original bytes */
684         return return_self(self);
685     }
686     if (maxcount < 0) {
687         maxcount = PY_SSIZE_T_MAX;
688     } else if (maxcount == 0) {
689         /* nothing to do; return the original bytes */
690         return return_self(self);
691     }
692 
693     /* Handle zero-length special cases */
694     if (from_len == 0) {
695         if (to_len == 0) {
696             /* nothing to do; return the original bytes */
697             return return_self(self);
698         }
699         /* insert the 'to' bytes everywhere.    */
700         /*    >>> b"Python".replace(b"", b".")  */
701         /*    b'.P.y.t.h.o.n.'                  */
702         return stringlib_replace_interleave(self, to_s, to_len, maxcount);
703     }
704 
705     if (to_len == 0) {
706         /* delete all occurrences of 'from' bytes */
707         if (from_len == 1) {
708             return stringlib_replace_delete_single_character(
709                 self, from_s[0], maxcount);
710         } else {
711             return stringlib_replace_delete_substring(
712                 self, from_s, from_len, maxcount);
713         }
714     }
715 
716     /* Handle special case where both bytes have the same length */
717 
718     if (from_len == to_len) {
719         if (from_len == 1) {
720             return stringlib_replace_single_character_in_place(
721                 self, from_s[0], to_s[0], maxcount);
722         } else {
723             return stringlib_replace_substring_in_place(
724                 self, from_s, from_len, to_s, to_len, maxcount);
725         }
726     }
727 
728     /* Otherwise use the more generic algorithms */
729     if (from_len == 1) {
730         return stringlib_replace_single_character(
731             self, from_s[0], to_s, to_len, maxcount);
732     } else {
733         /* len('from')>=2, len('to')>=1 */
734         return stringlib_replace_substring(
735             self, from_s, from_len, to_s, to_len, maxcount);
736     }
737 }
738 
739 #undef findchar
740