• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #if STRINGLIB_IS_UNICODE
2 # error "transmogrify.h only compatible with byte-wise strings"
3 #endif
4 
5 /* the more complicated methods.  parts of these should be pulled out into the
6    shared code in bytes_methods.c to cut down on duplicate code bloat.  */
7 
8 static inline PyObject *
return_self(PyObject * self)9 return_self(PyObject *self)
10 {
11 #if !STRINGLIB_MUTABLE
12     if (STRINGLIB_CHECK_EXACT(self)) {
13         Py_INCREF(self);
14         return self;
15     }
16 #endif
17     return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
18 }
19 
20 static PyObject*
stringlib_expandtabs(PyObject * self,PyObject * args,PyObject * kwds)21 stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
22 {
23     const char *e, *p;
24     char *q;
25     Py_ssize_t i, j;
26     PyObject *u;
27     static char *kwlist[] = {"tabsize", 0};
28     int tabsize = 8;
29 
30     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
31                                      kwlist, &tabsize))
32         return NULL;
33 
34     /* First pass: determine size of output string */
35     i = j = 0;
36     e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
37     for (p = STRINGLIB_STR(self); p < e; p++) {
38         if (*p == '\t') {
39             if (tabsize > 0) {
40                 Py_ssize_t incr = tabsize - (j % tabsize);
41                 if (j > PY_SSIZE_T_MAX - incr)
42                     goto overflow;
43                 j += incr;
44             }
45         }
46         else {
47             if (j > PY_SSIZE_T_MAX - 1)
48                 goto overflow;
49             j++;
50             if (*p == '\n' || *p == '\r') {
51                 if (i > PY_SSIZE_T_MAX - j)
52                     goto overflow;
53                 i += j;
54                 j = 0;
55             }
56         }
57     }
58 
59     if (i > PY_SSIZE_T_MAX - j)
60         goto overflow;
61 
62     /* Second pass: create output string and fill it */
63     u = STRINGLIB_NEW(NULL, i + j);
64     if (!u)
65         return NULL;
66 
67     j = 0;
68     q = STRINGLIB_STR(u);
69 
70     for (p = STRINGLIB_STR(self); p < e; p++) {
71         if (*p == '\t') {
72             if (tabsize > 0) {
73                 i = tabsize - (j % tabsize);
74                 j += i;
75                 while (i--)
76                     *q++ = ' ';
77             }
78         }
79         else {
80             j++;
81             *q++ = *p;
82             if (*p == '\n' || *p == '\r')
83                 j = 0;
84         }
85     }
86 
87     return u;
88   overflow:
89     PyErr_SetString(PyExc_OverflowError, "result too long");
90     return NULL;
91 }
92 
93 static inline PyObject *
pad(PyObject * self,Py_ssize_t left,Py_ssize_t right,char fill)94 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
95 {
96     PyObject *u;
97 
98     if (left < 0)
99         left = 0;
100     if (right < 0)
101         right = 0;
102 
103     if (left == 0 && right == 0) {
104         return return_self(self);
105     }
106 
107     u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
108     if (u) {
109         if (left)
110             memset(STRINGLIB_STR(u), fill, left);
111         memcpy(STRINGLIB_STR(u) + left,
112                STRINGLIB_STR(self),
113                STRINGLIB_LEN(self));
114         if (right)
115             memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
116                    fill, right);
117     }
118 
119     return u;
120 }
121 
122 static PyObject *
stringlib_ljust(PyObject * self,PyObject * args)123 stringlib_ljust(PyObject *self, PyObject *args)
124 {
125     Py_ssize_t width;
126     char fillchar = ' ';
127 
128     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
129         return NULL;
130 
131     if (STRINGLIB_LEN(self) >= width) {
132         return return_self(self);
133     }
134 
135     return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
136 }
137 
138 
139 static PyObject *
stringlib_rjust(PyObject * self,PyObject * args)140 stringlib_rjust(PyObject *self, PyObject *args)
141 {
142     Py_ssize_t width;
143     char fillchar = ' ';
144 
145     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
146         return NULL;
147 
148     if (STRINGLIB_LEN(self) >= width) {
149         return return_self(self);
150     }
151 
152     return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
153 }
154 
155 
156 static PyObject *
stringlib_center(PyObject * self,PyObject * args)157 stringlib_center(PyObject *self, PyObject *args)
158 {
159     Py_ssize_t marg, left;
160     Py_ssize_t width;
161     char fillchar = ' ';
162 
163     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
164         return NULL;
165 
166     if (STRINGLIB_LEN(self) >= width) {
167         return return_self(self);
168     }
169 
170     marg = width - STRINGLIB_LEN(self);
171     left = marg / 2 + (marg & width & 1);
172 
173     return pad(self, left, marg - left, fillchar);
174 }
175 
176 static PyObject *
stringlib_zfill(PyObject * self,PyObject * args)177 stringlib_zfill(PyObject *self, PyObject *args)
178 {
179     Py_ssize_t fill;
180     PyObject *s;
181     char *p;
182     Py_ssize_t width;
183 
184     if (!PyArg_ParseTuple(args, "n:zfill", &width))
185         return NULL;
186 
187     if (STRINGLIB_LEN(self) >= width) {
188         return return_self(self);
189     }
190 
191     fill = width - STRINGLIB_LEN(self);
192 
193     s = pad(self, fill, 0, '0');
194 
195     if (s == NULL)
196         return NULL;
197 
198     p = STRINGLIB_STR(s);
199     if (p[fill] == '+' || p[fill] == '-') {
200         /* move sign to beginning of string */
201         p[0] = p[fill];
202         p[fill] = '0';
203     }
204 
205     return s;
206 }
207 
208 
209 /* find and count characters and substrings */
210 
211 #define findchar(target, target_len, c)                         \
212   ((char *)memchr((const void *)(target), c, target_len))
213 
214 
215 static Py_ssize_t
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)216 countchar(const char *target, Py_ssize_t target_len, char c,
217           Py_ssize_t maxcount)
218 {
219     Py_ssize_t count = 0;
220     const char *start = target;
221     const char *end = target + target_len;
222 
223     while ((start = findchar(start, end - start, c)) != NULL) {
224         count++;
225         if (count >= maxcount)
226             break;
227         start += 1;
228     }
229     return count;
230 }
231 
232 
233 /* Algorithms for different cases of string replacement */
234 
235 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
236 static PyObject *
stringlib_replace_interleave(PyObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)237 stringlib_replace_interleave(PyObject *self,
238                              const char *to_s, Py_ssize_t to_len,
239                              Py_ssize_t maxcount)
240 {
241     const char *self_s;
242     char *result_s;
243     Py_ssize_t self_len, result_len;
244     Py_ssize_t count, i;
245     PyObject *result;
246 
247     self_len = STRINGLIB_LEN(self);
248 
249     /* 1 at the end plus 1 after every character;
250        count = min(maxcount, self_len + 1) */
251     if (maxcount <= self_len) {
252         count = maxcount;
253     }
254     else {
255         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
256         count = self_len + 1;
257     }
258 
259     /* Check for overflow */
260     /*   result_len = count * to_len + self_len; */
261     assert(count > 0);
262     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
263         PyErr_SetString(PyExc_OverflowError,
264                         "replace bytes are too long");
265         return NULL;
266     }
267     result_len = count * to_len + self_len;
268     result = STRINGLIB_NEW(NULL, result_len);
269     if (result == NULL) {
270         return NULL;
271     }
272 
273     self_s = STRINGLIB_STR(self);
274     result_s = STRINGLIB_STR(result);
275 
276     if (to_len > 1) {
277         /* Lay the first one down (guaranteed this will occur) */
278         memcpy(result_s, to_s, to_len);
279         result_s += to_len;
280         count -= 1;
281 
282         for (i = 0; i < count; i++) {
283             *result_s++ = *self_s++;
284             memcpy(result_s, to_s, to_len);
285             result_s += to_len;
286         }
287     }
288     else {
289         result_s[0] = to_s[0];
290         result_s += to_len;
291         count -= 1;
292         for (i = 0; i < count; i++) {
293             *result_s++ = *self_s++;
294             result_s[0] = to_s[0];
295             result_s += to_len;
296         }
297     }
298 
299     /* Copy the rest of the original string */
300     memcpy(result_s, self_s, self_len - i);
301 
302     return result;
303 }
304 
305 /* Special case for deleting a single character */
306 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
307 static PyObject *
stringlib_replace_delete_single_character(PyObject * self,char from_c,Py_ssize_t maxcount)308 stringlib_replace_delete_single_character(PyObject *self,
309                                           char from_c, Py_ssize_t maxcount)
310 {
311     const char *self_s, *start, *next, *end;
312     char *result_s;
313     Py_ssize_t self_len, result_len;
314     Py_ssize_t count;
315     PyObject *result;
316 
317     self_len = STRINGLIB_LEN(self);
318     self_s = STRINGLIB_STR(self);
319 
320     count = countchar(self_s, self_len, from_c, maxcount);
321     if (count == 0) {
322         return return_self(self);
323     }
324 
325     result_len = self_len - count;  /* from_len == 1 */
326     assert(result_len>=0);
327 
328     result = STRINGLIB_NEW(NULL, result_len);
329     if (result == NULL) {
330         return NULL;
331     }
332     result_s = STRINGLIB_STR(result);
333 
334     start = self_s;
335     end = self_s + self_len;
336     while (count-- > 0) {
337         next = findchar(start, end - start, from_c);
338         if (next == NULL)
339             break;
340         memcpy(result_s, start, next - start);
341         result_s += (next - start);
342         start = next + 1;
343     }
344     memcpy(result_s, start, end - start);
345 
346     return result;
347 }
348 
349 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
350 
351 static PyObject *
stringlib_replace_delete_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)352 stringlib_replace_delete_substring(PyObject *self,
353                                    const char *from_s, Py_ssize_t from_len,
354                                    Py_ssize_t maxcount)
355 {
356     const char *self_s, *start, *next, *end;
357     char *result_s;
358     Py_ssize_t self_len, result_len;
359     Py_ssize_t count, offset;
360     PyObject *result;
361 
362     self_len = STRINGLIB_LEN(self);
363     self_s = STRINGLIB_STR(self);
364 
365     count = stringlib_count(self_s, self_len,
366                             from_s, from_len,
367                             maxcount);
368 
369     if (count == 0) {
370         /* no matches */
371         return return_self(self);
372     }
373 
374     result_len = self_len - (count * from_len);
375     assert (result_len>=0);
376 
377     result = STRINGLIB_NEW(NULL, result_len);
378     if (result == NULL) {
379         return NULL;
380     }
381     result_s = STRINGLIB_STR(result);
382 
383     start = self_s;
384     end = self_s + self_len;
385     while (count-- > 0) {
386         offset = stringlib_find(start, end - start,
387                                 from_s, from_len,
388                                 0);
389         if (offset == -1)
390             break;
391         next = start + offset;
392 
393         memcpy(result_s, start, next - start);
394 
395         result_s += (next - start);
396         start = next + from_len;
397     }
398     memcpy(result_s, start, end - start);
399     return result;
400 }
401 
402 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
403 static PyObject *
stringlib_replace_single_character_in_place(PyObject * self,char from_c,char to_c,Py_ssize_t maxcount)404 stringlib_replace_single_character_in_place(PyObject *self,
405                                             char from_c, char to_c,
406                                             Py_ssize_t maxcount)
407 {
408     const char *self_s, *end;
409     char *result_s, *start, *next;
410     Py_ssize_t self_len;
411     PyObject *result;
412 
413     /* The result string will be the same size */
414     self_s = STRINGLIB_STR(self);
415     self_len = STRINGLIB_LEN(self);
416 
417     next = findchar(self_s, self_len, from_c);
418 
419     if (next == NULL) {
420         /* No matches; return the original bytes */
421         return return_self(self);
422     }
423 
424     /* Need to make a new bytes */
425     result = STRINGLIB_NEW(NULL, self_len);
426     if (result == NULL) {
427         return NULL;
428     }
429     result_s = STRINGLIB_STR(result);
430     memcpy(result_s, self_s, self_len);
431 
432     /* change everything in-place, starting with this one */
433     start =  result_s + (next - self_s);
434     *start = to_c;
435     start++;
436     end = result_s + self_len;
437 
438     while (--maxcount > 0) {
439         next = findchar(start, end - start, from_c);
440         if (next == NULL)
441             break;
442         *next = to_c;
443         start = next + 1;
444     }
445 
446     return result;
447 }
448 
449 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
450 static PyObject *
stringlib_replace_substring_in_place(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)451 stringlib_replace_substring_in_place(PyObject *self,
452                                      const char *from_s, Py_ssize_t from_len,
453                                      const char *to_s, Py_ssize_t to_len,
454                                      Py_ssize_t maxcount)
455 {
456     const char *self_s, *end;
457     char *result_s, *start;
458     Py_ssize_t self_len, offset;
459     PyObject *result;
460 
461     /* The result bytes will be the same size */
462 
463     self_s = STRINGLIB_STR(self);
464     self_len = STRINGLIB_LEN(self);
465 
466     offset = stringlib_find(self_s, self_len,
467                             from_s, from_len,
468                             0);
469     if (offset == -1) {
470         /* No matches; return the original bytes */
471         return return_self(self);
472     }
473 
474     /* Need to make a new bytes */
475     result = STRINGLIB_NEW(NULL, self_len);
476     if (result == NULL) {
477         return NULL;
478     }
479     result_s = STRINGLIB_STR(result);
480     memcpy(result_s, self_s, self_len);
481 
482     /* change everything in-place, starting with this one */
483     start =  result_s + offset;
484     memcpy(start, to_s, from_len);
485     start += from_len;
486     end = result_s + self_len;
487 
488     while ( --maxcount > 0) {
489         offset = stringlib_find(start, end - start,
490                                 from_s, from_len,
491                                 0);
492         if (offset == -1)
493             break;
494         memcpy(start + offset, to_s, from_len);
495         start += offset + from_len;
496     }
497 
498     return result;
499 }
500 
501 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
502 static PyObject *
stringlib_replace_single_character(PyObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)503 stringlib_replace_single_character(PyObject *self,
504                                    char from_c,
505                                    const char *to_s, Py_ssize_t to_len,
506                                    Py_ssize_t maxcount)
507 {
508     const char *self_s, *start, *next, *end;
509     char *result_s;
510     Py_ssize_t self_len, result_len;
511     Py_ssize_t count;
512     PyObject *result;
513 
514     self_s = STRINGLIB_STR(self);
515     self_len = STRINGLIB_LEN(self);
516 
517     count = countchar(self_s, self_len, from_c, maxcount);
518     if (count == 0) {
519         /* no matches, return unchanged */
520         return return_self(self);
521     }
522 
523     /* use the difference between current and new, hence the "-1" */
524     /*   result_len = self_len + count * (to_len-1)  */
525     assert(count > 0);
526     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
527         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
528         return NULL;
529     }
530     result_len = self_len + count * (to_len - 1);
531 
532     result = STRINGLIB_NEW(NULL, result_len);
533     if (result == NULL) {
534         return NULL;
535     }
536     result_s = STRINGLIB_STR(result);
537 
538     start = self_s;
539     end = self_s + self_len;
540     while (count-- > 0) {
541         next = findchar(start, end - start, from_c);
542         if (next == NULL)
543             break;
544 
545         if (next == start) {
546             /* replace with the 'to' */
547             memcpy(result_s, to_s, to_len);
548             result_s += to_len;
549             start += 1;
550         } else {
551             /* copy the unchanged old then the 'to' */
552             memcpy(result_s, start, next - start);
553             result_s += (next - start);
554             memcpy(result_s, to_s, to_len);
555             result_s += to_len;
556             start = next + 1;
557         }
558     }
559     /* Copy the remainder of the remaining bytes */
560     memcpy(result_s, start, end - start);
561 
562     return result;
563 }
564 
565 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
566 static PyObject *
stringlib_replace_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)567 stringlib_replace_substring(PyObject *self,
568                             const char *from_s, Py_ssize_t from_len,
569                             const char *to_s, Py_ssize_t to_len,
570                             Py_ssize_t maxcount)
571 {
572     const char *self_s, *start, *next, *end;
573     char *result_s;
574     Py_ssize_t self_len, result_len;
575     Py_ssize_t count, offset;
576     PyObject *result;
577 
578     self_s = STRINGLIB_STR(self);
579     self_len = STRINGLIB_LEN(self);
580 
581     count = stringlib_count(self_s, self_len,
582                             from_s, from_len,
583                             maxcount);
584 
585     if (count == 0) {
586         /* no matches, return unchanged */
587         return return_self(self);
588     }
589 
590     /* Check for overflow */
591     /*    result_len = self_len + count * (to_len-from_len) */
592     assert(count > 0);
593     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
594         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
595         return NULL;
596     }
597     result_len = self_len + count * (to_len - from_len);
598 
599     result = STRINGLIB_NEW(NULL, result_len);
600     if (result == NULL) {
601         return NULL;
602     }
603     result_s = STRINGLIB_STR(result);
604 
605     start = self_s;
606     end = self_s + self_len;
607     while (count-- > 0) {
608         offset = stringlib_find(start, end - start,
609                                 from_s, from_len,
610                                 0);
611         if (offset == -1)
612             break;
613         next = start + offset;
614         if (next == start) {
615             /* replace with the 'to' */
616             memcpy(result_s, to_s, to_len);
617             result_s += to_len;
618             start += from_len;
619         } else {
620             /* copy the unchanged old then the 'to' */
621             memcpy(result_s, start, next - start);
622             result_s += (next - start);
623             memcpy(result_s, to_s, to_len);
624             result_s += to_len;
625             start = next + from_len;
626         }
627     }
628     /* Copy the remainder of the remaining bytes */
629     memcpy(result_s, start, end - start);
630 
631     return result;
632 }
633 
634 
635 static PyObject *
stringlib_replace(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)636 stringlib_replace(PyObject *self,
637                   const char *from_s, Py_ssize_t from_len,
638                   const char *to_s, Py_ssize_t to_len,
639                   Py_ssize_t maxcount)
640 {
641     if (maxcount < 0) {
642         maxcount = PY_SSIZE_T_MAX;
643     } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
644         /* nothing to do; return the original bytes */
645         return return_self(self);
646     }
647 
648     /* Handle zero-length special cases */
649     if (from_len == 0) {
650         if (to_len == 0) {
651             /* nothing to do; return the original bytes */
652             return return_self(self);
653         }
654         /* insert the 'to' bytes everywhere.    */
655         /*    >>> b"Python".replace(b"", b".")  */
656         /*    b'.P.y.t.h.o.n.'                  */
657         return stringlib_replace_interleave(self, to_s, to_len, maxcount);
658     }
659 
660     /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
661     /* point for an empty self bytes to generate a non-empty bytes */
662     /* Special case so the remaining code always gets a non-empty bytes */
663     if (STRINGLIB_LEN(self) == 0) {
664         return return_self(self);
665     }
666 
667     if (to_len == 0) {
668         /* delete all occurrences of 'from' bytes */
669         if (from_len == 1) {
670             return stringlib_replace_delete_single_character(
671                 self, from_s[0], maxcount);
672         } else {
673             return stringlib_replace_delete_substring(
674                 self, from_s, from_len, maxcount);
675         }
676     }
677 
678     /* Handle special case where both bytes have the same length */
679 
680     if (from_len == to_len) {
681         if (from_len == 1) {
682             return stringlib_replace_single_character_in_place(
683                 self, from_s[0], to_s[0], maxcount);
684         } else {
685             return stringlib_replace_substring_in_place(
686                 self, from_s, from_len, to_s, to_len, maxcount);
687         }
688     }
689 
690     /* Otherwise use the more generic algorithms */
691     if (from_len == 1) {
692         return stringlib_replace_single_character(
693             self, from_s[0], to_s, to_len, maxcount);
694     } else {
695         /* len('from')>=2, len('to')>=1 */
696         return stringlib_replace_substring(
697             self, from_s, from_len, to_s, to_len, maxcount);
698     }
699 }
700 
701 #undef findchar
702