1 #if STRINGLIB_IS_UNICODE
2 # error "transmogrify.h only compatible with byte-wise strings"
3 #endif
4
5 /* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
8 /*[clinic input]
9 class B "PyObject *" "&PyType_Type"
10 [clinic start generated code]*/
11 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13 #include "clinic/transmogrify.h.h"
14
15 static inline PyObject *
return_self(PyObject * self)16 return_self(PyObject *self)
17 {
18 #if !STRINGLIB_MUTABLE
19 if (STRINGLIB_CHECK_EXACT(self)) {
20 return Py_NewRef(self);
21 }
22 #endif
23 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
24 }
25
26 /*[clinic input]
27 B.expandtabs as stringlib_expandtabs
28
29 tabsize: int = 8
30
31 Return a copy where all tab characters are expanded using spaces.
32
33 If tabsize is not given, a tab size of 8 characters is assumed.
34 [clinic start generated code]*/
35
36 static PyObject *
stringlib_expandtabs_impl(PyObject * self,int tabsize)37 stringlib_expandtabs_impl(PyObject *self, int tabsize)
38 /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
39 {
40 const char *e, *p;
41 char *q;
42 Py_ssize_t i, j;
43 PyObject *u;
44
45 /* First pass: determine size of output string */
46 i = j = 0;
47 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
48 for (p = STRINGLIB_STR(self); p < e; p++) {
49 if (*p == '\t') {
50 if (tabsize > 0) {
51 Py_ssize_t incr = tabsize - (j % tabsize);
52 if (j > PY_SSIZE_T_MAX - incr)
53 goto overflow;
54 j += incr;
55 }
56 }
57 else {
58 if (j > PY_SSIZE_T_MAX - 1)
59 goto overflow;
60 j++;
61 if (*p == '\n' || *p == '\r') {
62 if (i > PY_SSIZE_T_MAX - j)
63 goto overflow;
64 i += j;
65 j = 0;
66 }
67 }
68 }
69
70 if (i > PY_SSIZE_T_MAX - j)
71 goto overflow;
72
73 /* Second pass: create output string and fill it */
74 u = STRINGLIB_NEW(NULL, i + j);
75 if (!u)
76 return NULL;
77
78 j = 0;
79 q = STRINGLIB_STR(u);
80
81 for (p = STRINGLIB_STR(self); p < e; p++) {
82 if (*p == '\t') {
83 if (tabsize > 0) {
84 i = tabsize - (j % tabsize);
85 j += i;
86 while (i--)
87 *q++ = ' ';
88 }
89 }
90 else {
91 j++;
92 *q++ = *p;
93 if (*p == '\n' || *p == '\r')
94 j = 0;
95 }
96 }
97
98 return u;
99 overflow:
100 PyErr_SetString(PyExc_OverflowError, "result too long");
101 return NULL;
102 }
103
104 static inline PyObject *
pad(PyObject * self,Py_ssize_t left,Py_ssize_t right,char fill)105 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
106 {
107 PyObject *u;
108
109 if (left < 0)
110 left = 0;
111 if (right < 0)
112 right = 0;
113
114 if (left == 0 && right == 0) {
115 return return_self(self);
116 }
117
118 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
119 if (u) {
120 if (left)
121 memset(STRINGLIB_STR(u), fill, left);
122 memcpy(STRINGLIB_STR(u) + left,
123 STRINGLIB_STR(self),
124 STRINGLIB_LEN(self));
125 if (right)
126 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
127 fill, right);
128 }
129
130 return u;
131 }
132
133 /*[clinic input]
134 B.ljust as stringlib_ljust
135
136 width: Py_ssize_t
137 fillchar: char = b' '
138 /
139
140 Return a left-justified string of length width.
141
142 Padding is done using the specified fill character.
143 [clinic start generated code]*/
144
145 static PyObject *
stringlib_ljust_impl(PyObject * self,Py_ssize_t width,char fillchar)146 stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
147 /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
148 {
149 if (STRINGLIB_LEN(self) >= width) {
150 return return_self(self);
151 }
152
153 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
154 }
155
156
157 /*[clinic input]
158 B.rjust as stringlib_rjust
159
160 width: Py_ssize_t
161 fillchar: char = b' '
162 /
163
164 Return a right-justified string of length width.
165
166 Padding is done using the specified fill character.
167 [clinic start generated code]*/
168
169 static PyObject *
stringlib_rjust_impl(PyObject * self,Py_ssize_t width,char fillchar)170 stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
171 /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
172 {
173 if (STRINGLIB_LEN(self) >= width) {
174 return return_self(self);
175 }
176
177 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
178 }
179
180
181 /*[clinic input]
182 B.center as stringlib_center
183
184 width: Py_ssize_t
185 fillchar: char = b' '
186 /
187
188 Return a centered string of length width.
189
190 Padding is done using the specified fill character.
191 [clinic start generated code]*/
192
193 static PyObject *
stringlib_center_impl(PyObject * self,Py_ssize_t width,char fillchar)194 stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
195 /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
196 {
197 Py_ssize_t marg, left;
198
199 if (STRINGLIB_LEN(self) >= width) {
200 return return_self(self);
201 }
202
203 marg = width - STRINGLIB_LEN(self);
204 left = marg / 2 + (marg & width & 1);
205
206 return pad(self, left, marg - left, fillchar);
207 }
208
209 /*[clinic input]
210 B.zfill as stringlib_zfill
211
212 width: Py_ssize_t
213 /
214
215 Pad a numeric string with zeros on the left, to fill a field of the given width.
216
217 The original string is never truncated.
218 [clinic start generated code]*/
219
220 static PyObject *
stringlib_zfill_impl(PyObject * self,Py_ssize_t width)221 stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
222 /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
223 {
224 Py_ssize_t fill;
225 PyObject *s;
226 char *p;
227
228 if (STRINGLIB_LEN(self) >= width) {
229 return return_self(self);
230 }
231
232 fill = width - STRINGLIB_LEN(self);
233
234 s = pad(self, fill, 0, '0');
235
236 if (s == NULL)
237 return NULL;
238
239 p = STRINGLIB_STR(s);
240 if (p[fill] == '+' || p[fill] == '-') {
241 /* move sign to beginning of string */
242 p[0] = p[fill];
243 p[fill] = '0';
244 }
245
246 return s;
247 }
248
249
250 /* find and count characters and substrings */
251
252 #define findchar(target, target_len, c) \
253 ((char *)memchr((const void *)(target), c, target_len))
254
255
256 static Py_ssize_t
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)257 countchar(const char *target, Py_ssize_t target_len, char c,
258 Py_ssize_t maxcount)
259 {
260 Py_ssize_t count = 0;
261 const char *start = target;
262 const char *end = target + target_len;
263
264 while ((start = findchar(start, end - start, c)) != NULL) {
265 count++;
266 if (count >= maxcount)
267 break;
268 start += 1;
269 }
270 return count;
271 }
272
273
274 /* Algorithms for different cases of string replacement */
275
276 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
277 static PyObject *
stringlib_replace_interleave(PyObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)278 stringlib_replace_interleave(PyObject *self,
279 const char *to_s, Py_ssize_t to_len,
280 Py_ssize_t maxcount)
281 {
282 const char *self_s;
283 char *result_s;
284 Py_ssize_t self_len, result_len;
285 Py_ssize_t count, i;
286 PyObject *result;
287
288 self_len = STRINGLIB_LEN(self);
289
290 /* 1 at the end plus 1 after every character;
291 count = min(maxcount, self_len + 1) */
292 if (maxcount <= self_len) {
293 count = maxcount;
294 }
295 else {
296 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
297 count = self_len + 1;
298 }
299
300 /* Check for overflow */
301 /* result_len = count * to_len + self_len; */
302 assert(count > 0);
303 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
304 PyErr_SetString(PyExc_OverflowError,
305 "replace bytes is too long");
306 return NULL;
307 }
308 result_len = count * to_len + self_len;
309 result = STRINGLIB_NEW(NULL, result_len);
310 if (result == NULL) {
311 return NULL;
312 }
313
314 self_s = STRINGLIB_STR(self);
315 result_s = STRINGLIB_STR(result);
316
317 if (to_len > 1) {
318 /* Lay the first one down (guaranteed this will occur) */
319 memcpy(result_s, to_s, to_len);
320 result_s += to_len;
321 count -= 1;
322
323 for (i = 0; i < count; i++) {
324 *result_s++ = *self_s++;
325 memcpy(result_s, to_s, to_len);
326 result_s += to_len;
327 }
328 }
329 else {
330 result_s[0] = to_s[0];
331 result_s += to_len;
332 count -= 1;
333 for (i = 0; i < count; i++) {
334 *result_s++ = *self_s++;
335 result_s[0] = to_s[0];
336 result_s += to_len;
337 }
338 }
339
340 /* Copy the rest of the original string */
341 memcpy(result_s, self_s, self_len - i);
342
343 return result;
344 }
345
346 /* Special case for deleting a single character */
347 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
348 static PyObject *
stringlib_replace_delete_single_character(PyObject * self,char from_c,Py_ssize_t maxcount)349 stringlib_replace_delete_single_character(PyObject *self,
350 char from_c, Py_ssize_t maxcount)
351 {
352 const char *self_s, *start, *next, *end;
353 char *result_s;
354 Py_ssize_t self_len, result_len;
355 Py_ssize_t count;
356 PyObject *result;
357
358 self_len = STRINGLIB_LEN(self);
359 self_s = STRINGLIB_STR(self);
360
361 count = countchar(self_s, self_len, from_c, maxcount);
362 if (count == 0) {
363 return return_self(self);
364 }
365
366 result_len = self_len - count; /* from_len == 1 */
367 assert(result_len>=0);
368
369 result = STRINGLIB_NEW(NULL, result_len);
370 if (result == NULL) {
371 return NULL;
372 }
373 result_s = STRINGLIB_STR(result);
374
375 start = self_s;
376 end = self_s + self_len;
377 while (count-- > 0) {
378 next = findchar(start, end - start, from_c);
379 if (next == NULL)
380 break;
381 memcpy(result_s, start, next - start);
382 result_s += (next - start);
383 start = next + 1;
384 }
385 memcpy(result_s, start, end - start);
386
387 return result;
388 }
389
390 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
391
392 static PyObject *
stringlib_replace_delete_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)393 stringlib_replace_delete_substring(PyObject *self,
394 const char *from_s, Py_ssize_t from_len,
395 Py_ssize_t maxcount)
396 {
397 const char *self_s, *start, *next, *end;
398 char *result_s;
399 Py_ssize_t self_len, result_len;
400 Py_ssize_t count, offset;
401 PyObject *result;
402
403 self_len = STRINGLIB_LEN(self);
404 self_s = STRINGLIB_STR(self);
405
406 count = stringlib_count(self_s, self_len,
407 from_s, from_len,
408 maxcount);
409
410 if (count == 0) {
411 /* no matches */
412 return return_self(self);
413 }
414
415 result_len = self_len - (count * from_len);
416 assert (result_len>=0);
417
418 result = STRINGLIB_NEW(NULL, result_len);
419 if (result == NULL) {
420 return NULL;
421 }
422 result_s = STRINGLIB_STR(result);
423
424 start = self_s;
425 end = self_s + self_len;
426 while (count-- > 0) {
427 offset = stringlib_find(start, end - start,
428 from_s, from_len,
429 0);
430 if (offset == -1)
431 break;
432 next = start + offset;
433
434 memcpy(result_s, start, next - start);
435
436 result_s += (next - start);
437 start = next + from_len;
438 }
439 memcpy(result_s, start, end - start);
440 return result;
441 }
442
443 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
444 static PyObject *
stringlib_replace_single_character_in_place(PyObject * self,char from_c,char to_c,Py_ssize_t maxcount)445 stringlib_replace_single_character_in_place(PyObject *self,
446 char from_c, char to_c,
447 Py_ssize_t maxcount)
448 {
449 const char *self_s, *end;
450 char *result_s, *start, *next;
451 Py_ssize_t self_len;
452 PyObject *result;
453
454 /* The result string will be the same size */
455 self_s = STRINGLIB_STR(self);
456 self_len = STRINGLIB_LEN(self);
457
458 next = findchar(self_s, self_len, from_c);
459
460 if (next == NULL) {
461 /* No matches; return the original bytes */
462 return return_self(self);
463 }
464
465 /* Need to make a new bytes */
466 result = STRINGLIB_NEW(NULL, self_len);
467 if (result == NULL) {
468 return NULL;
469 }
470 result_s = STRINGLIB_STR(result);
471 memcpy(result_s, self_s, self_len);
472
473 /* change everything in-place, starting with this one */
474 start = result_s + (next - self_s);
475 *start = to_c;
476 start++;
477 end = result_s + self_len;
478
479 while (--maxcount > 0) {
480 next = findchar(start, end - start, from_c);
481 if (next == NULL)
482 break;
483 *next = to_c;
484 start = next + 1;
485 }
486
487 return result;
488 }
489
490 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
491 static PyObject *
stringlib_replace_substring_in_place(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)492 stringlib_replace_substring_in_place(PyObject *self,
493 const char *from_s, Py_ssize_t from_len,
494 const char *to_s, Py_ssize_t to_len,
495 Py_ssize_t maxcount)
496 {
497 const char *self_s, *end;
498 char *result_s, *start;
499 Py_ssize_t self_len, offset;
500 PyObject *result;
501
502 /* The result bytes will be the same size */
503
504 self_s = STRINGLIB_STR(self);
505 self_len = STRINGLIB_LEN(self);
506
507 offset = stringlib_find(self_s, self_len,
508 from_s, from_len,
509 0);
510 if (offset == -1) {
511 /* No matches; return the original bytes */
512 return return_self(self);
513 }
514
515 /* Need to make a new bytes */
516 result = STRINGLIB_NEW(NULL, self_len);
517 if (result == NULL) {
518 return NULL;
519 }
520 result_s = STRINGLIB_STR(result);
521 memcpy(result_s, self_s, self_len);
522
523 /* change everything in-place, starting with this one */
524 start = result_s + offset;
525 memcpy(start, to_s, from_len);
526 start += from_len;
527 end = result_s + self_len;
528
529 while ( --maxcount > 0) {
530 offset = stringlib_find(start, end - start,
531 from_s, from_len,
532 0);
533 if (offset == -1)
534 break;
535 memcpy(start + offset, to_s, from_len);
536 start += offset + from_len;
537 }
538
539 return result;
540 }
541
542 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
543 static PyObject *
stringlib_replace_single_character(PyObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)544 stringlib_replace_single_character(PyObject *self,
545 char from_c,
546 const char *to_s, Py_ssize_t to_len,
547 Py_ssize_t maxcount)
548 {
549 const char *self_s, *start, *next, *end;
550 char *result_s;
551 Py_ssize_t self_len, result_len;
552 Py_ssize_t count;
553 PyObject *result;
554
555 self_s = STRINGLIB_STR(self);
556 self_len = STRINGLIB_LEN(self);
557
558 count = countchar(self_s, self_len, from_c, maxcount);
559 if (count == 0) {
560 /* no matches, return unchanged */
561 return return_self(self);
562 }
563
564 /* use the difference between current and new, hence the "-1" */
565 /* result_len = self_len + count * (to_len-1) */
566 assert(count > 0);
567 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
568 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
569 return NULL;
570 }
571 result_len = self_len + count * (to_len - 1);
572
573 result = STRINGLIB_NEW(NULL, result_len);
574 if (result == NULL) {
575 return NULL;
576 }
577 result_s = STRINGLIB_STR(result);
578
579 start = self_s;
580 end = self_s + self_len;
581 while (count-- > 0) {
582 next = findchar(start, end - start, from_c);
583 if (next == NULL)
584 break;
585
586 if (next == start) {
587 /* replace with the 'to' */
588 memcpy(result_s, to_s, to_len);
589 result_s += to_len;
590 start += 1;
591 } else {
592 /* copy the unchanged old then the 'to' */
593 memcpy(result_s, start, next - start);
594 result_s += (next - start);
595 memcpy(result_s, to_s, to_len);
596 result_s += to_len;
597 start = next + 1;
598 }
599 }
600 /* Copy the remainder of the remaining bytes */
601 memcpy(result_s, start, end - start);
602
603 return result;
604 }
605
606 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
607 static PyObject *
stringlib_replace_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)608 stringlib_replace_substring(PyObject *self,
609 const char *from_s, Py_ssize_t from_len,
610 const char *to_s, Py_ssize_t to_len,
611 Py_ssize_t maxcount)
612 {
613 const char *self_s, *start, *next, *end;
614 char *result_s;
615 Py_ssize_t self_len, result_len;
616 Py_ssize_t count, offset;
617 PyObject *result;
618
619 self_s = STRINGLIB_STR(self);
620 self_len = STRINGLIB_LEN(self);
621
622 count = stringlib_count(self_s, self_len,
623 from_s, from_len,
624 maxcount);
625
626 if (count == 0) {
627 /* no matches, return unchanged */
628 return return_self(self);
629 }
630
631 /* Check for overflow */
632 /* result_len = self_len + count * (to_len-from_len) */
633 assert(count > 0);
634 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
635 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
636 return NULL;
637 }
638 result_len = self_len + count * (to_len - from_len);
639
640 result = STRINGLIB_NEW(NULL, result_len);
641 if (result == NULL) {
642 return NULL;
643 }
644 result_s = STRINGLIB_STR(result);
645
646 start = self_s;
647 end = self_s + self_len;
648 while (count-- > 0) {
649 offset = stringlib_find(start, end - start,
650 from_s, from_len,
651 0);
652 if (offset == -1)
653 break;
654 next = start + offset;
655 if (next == start) {
656 /* replace with the 'to' */
657 memcpy(result_s, to_s, to_len);
658 result_s += to_len;
659 start += from_len;
660 } else {
661 /* copy the unchanged old then the 'to' */
662 memcpy(result_s, start, next - start);
663 result_s += (next - start);
664 memcpy(result_s, to_s, to_len);
665 result_s += to_len;
666 start = next + from_len;
667 }
668 }
669 /* Copy the remainder of the remaining bytes */
670 memcpy(result_s, start, end - start);
671
672 return result;
673 }
674
675
676 static PyObject *
stringlib_replace(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)677 stringlib_replace(PyObject *self,
678 const char *from_s, Py_ssize_t from_len,
679 const char *to_s, Py_ssize_t to_len,
680 Py_ssize_t maxcount)
681 {
682 if (STRINGLIB_LEN(self) < from_len) {
683 /* nothing to do; return the original bytes */
684 return return_self(self);
685 }
686 if (maxcount < 0) {
687 maxcount = PY_SSIZE_T_MAX;
688 } else if (maxcount == 0) {
689 /* nothing to do; return the original bytes */
690 return return_self(self);
691 }
692
693 /* Handle zero-length special cases */
694 if (from_len == 0) {
695 if (to_len == 0) {
696 /* nothing to do; return the original bytes */
697 return return_self(self);
698 }
699 /* insert the 'to' bytes everywhere. */
700 /* >>> b"Python".replace(b"", b".") */
701 /* b'.P.y.t.h.o.n.' */
702 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
703 }
704
705 if (to_len == 0) {
706 /* delete all occurrences of 'from' bytes */
707 if (from_len == 1) {
708 return stringlib_replace_delete_single_character(
709 self, from_s[0], maxcount);
710 } else {
711 return stringlib_replace_delete_substring(
712 self, from_s, from_len, maxcount);
713 }
714 }
715
716 /* Handle special case where both bytes have the same length */
717
718 if (from_len == to_len) {
719 if (from_len == 1) {
720 return stringlib_replace_single_character_in_place(
721 self, from_s[0], to_s[0], maxcount);
722 } else {
723 return stringlib_replace_substring_in_place(
724 self, from_s, from_len, to_s, to_len, maxcount);
725 }
726 }
727
728 /* Otherwise use the more generic algorithms */
729 if (from_len == 1) {
730 return stringlib_replace_single_character(
731 self, from_s[0], to_s, to_len, maxcount);
732 } else {
733 /* len('from')>=2, len('to')>=1 */
734 return stringlib_replace_substring(
735 self, from_s, from_len, to_s, to_len, maxcount);
736 }
737 }
738
739 #undef findchar
740