1 #if STRINGLIB_IS_UNICODE
2 # error "transmogrify.h only compatible with byte-wise strings"
3 #endif
4
5 /* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
8 /*[clinic input]
9 class B "PyObject *" "&PyType_Type"
10 [clinic start generated code]*/
11 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13 #include "clinic/transmogrify.h.h"
14
15 static inline PyObject *
return_self(PyObject * self)16 return_self(PyObject *self)
17 {
18 #if !STRINGLIB_MUTABLE
19 if (STRINGLIB_CHECK_EXACT(self)) {
20 Py_INCREF(self);
21 return self;
22 }
23 #endif
24 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
25 }
26
27 /*[clinic input]
28 B.expandtabs as stringlib_expandtabs
29
30 tabsize: int = 8
31
32 Return a copy where all tab characters are expanded using spaces.
33
34 If tabsize is not given, a tab size of 8 characters is assumed.
35 [clinic start generated code]*/
36
37 static PyObject *
stringlib_expandtabs_impl(PyObject * self,int tabsize)38 stringlib_expandtabs_impl(PyObject *self, int tabsize)
39 /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
40 {
41 const char *e, *p;
42 char *q;
43 Py_ssize_t i, j;
44 PyObject *u;
45
46 /* First pass: determine size of output string */
47 i = j = 0;
48 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
49 for (p = STRINGLIB_STR(self); p < e; p++) {
50 if (*p == '\t') {
51 if (tabsize > 0) {
52 Py_ssize_t incr = tabsize - (j % tabsize);
53 if (j > PY_SSIZE_T_MAX - incr)
54 goto overflow;
55 j += incr;
56 }
57 }
58 else {
59 if (j > PY_SSIZE_T_MAX - 1)
60 goto overflow;
61 j++;
62 if (*p == '\n' || *p == '\r') {
63 if (i > PY_SSIZE_T_MAX - j)
64 goto overflow;
65 i += j;
66 j = 0;
67 }
68 }
69 }
70
71 if (i > PY_SSIZE_T_MAX - j)
72 goto overflow;
73
74 /* Second pass: create output string and fill it */
75 u = STRINGLIB_NEW(NULL, i + j);
76 if (!u)
77 return NULL;
78
79 j = 0;
80 q = STRINGLIB_STR(u);
81
82 for (p = STRINGLIB_STR(self); p < e; p++) {
83 if (*p == '\t') {
84 if (tabsize > 0) {
85 i = tabsize - (j % tabsize);
86 j += i;
87 while (i--)
88 *q++ = ' ';
89 }
90 }
91 else {
92 j++;
93 *q++ = *p;
94 if (*p == '\n' || *p == '\r')
95 j = 0;
96 }
97 }
98
99 return u;
100 overflow:
101 PyErr_SetString(PyExc_OverflowError, "result too long");
102 return NULL;
103 }
104
105 static inline PyObject *
pad(PyObject * self,Py_ssize_t left,Py_ssize_t right,char fill)106 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
107 {
108 PyObject *u;
109
110 if (left < 0)
111 left = 0;
112 if (right < 0)
113 right = 0;
114
115 if (left == 0 && right == 0) {
116 return return_self(self);
117 }
118
119 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
120 if (u) {
121 if (left)
122 memset(STRINGLIB_STR(u), fill, left);
123 memcpy(STRINGLIB_STR(u) + left,
124 STRINGLIB_STR(self),
125 STRINGLIB_LEN(self));
126 if (right)
127 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
128 fill, right);
129 }
130
131 return u;
132 }
133
134 /*[clinic input]
135 B.ljust as stringlib_ljust
136
137 width: Py_ssize_t
138 fillchar: char = b' '
139 /
140
141 Return a left-justified string of length width.
142
143 Padding is done using the specified fill character.
144 [clinic start generated code]*/
145
146 static PyObject *
stringlib_ljust_impl(PyObject * self,Py_ssize_t width,char fillchar)147 stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
148 /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
149 {
150 if (STRINGLIB_LEN(self) >= width) {
151 return return_self(self);
152 }
153
154 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
155 }
156
157
158 /*[clinic input]
159 B.rjust as stringlib_rjust
160
161 width: Py_ssize_t
162 fillchar: char = b' '
163 /
164
165 Return a right-justified string of length width.
166
167 Padding is done using the specified fill character.
168 [clinic start generated code]*/
169
170 static PyObject *
stringlib_rjust_impl(PyObject * self,Py_ssize_t width,char fillchar)171 stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
172 /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
173 {
174 if (STRINGLIB_LEN(self) >= width) {
175 return return_self(self);
176 }
177
178 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
179 }
180
181
182 /*[clinic input]
183 B.center as stringlib_center
184
185 width: Py_ssize_t
186 fillchar: char = b' '
187 /
188
189 Return a centered string of length width.
190
191 Padding is done using the specified fill character.
192 [clinic start generated code]*/
193
194 static PyObject *
stringlib_center_impl(PyObject * self,Py_ssize_t width,char fillchar)195 stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
196 /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
197 {
198 Py_ssize_t marg, left;
199
200 if (STRINGLIB_LEN(self) >= width) {
201 return return_self(self);
202 }
203
204 marg = width - STRINGLIB_LEN(self);
205 left = marg / 2 + (marg & width & 1);
206
207 return pad(self, left, marg - left, fillchar);
208 }
209
210 /*[clinic input]
211 B.zfill as stringlib_zfill
212
213 width: Py_ssize_t
214 /
215
216 Pad a numeric string with zeros on the left, to fill a field of the given width.
217
218 The original string is never truncated.
219 [clinic start generated code]*/
220
221 static PyObject *
stringlib_zfill_impl(PyObject * self,Py_ssize_t width)222 stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
223 /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
224 {
225 Py_ssize_t fill;
226 PyObject *s;
227 char *p;
228
229 if (STRINGLIB_LEN(self) >= width) {
230 return return_self(self);
231 }
232
233 fill = width - STRINGLIB_LEN(self);
234
235 s = pad(self, fill, 0, '0');
236
237 if (s == NULL)
238 return NULL;
239
240 p = STRINGLIB_STR(s);
241 if (p[fill] == '+' || p[fill] == '-') {
242 /* move sign to beginning of string */
243 p[0] = p[fill];
244 p[fill] = '0';
245 }
246
247 return s;
248 }
249
250
251 /* find and count characters and substrings */
252
253 #define findchar(target, target_len, c) \
254 ((char *)memchr((const void *)(target), c, target_len))
255
256
257 static Py_ssize_t
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)258 countchar(const char *target, Py_ssize_t target_len, char c,
259 Py_ssize_t maxcount)
260 {
261 Py_ssize_t count = 0;
262 const char *start = target;
263 const char *end = target + target_len;
264
265 while ((start = findchar(start, end - start, c)) != NULL) {
266 count++;
267 if (count >= maxcount)
268 break;
269 start += 1;
270 }
271 return count;
272 }
273
274
275 /* Algorithms for different cases of string replacement */
276
277 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
278 static PyObject *
stringlib_replace_interleave(PyObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)279 stringlib_replace_interleave(PyObject *self,
280 const char *to_s, Py_ssize_t to_len,
281 Py_ssize_t maxcount)
282 {
283 const char *self_s;
284 char *result_s;
285 Py_ssize_t self_len, result_len;
286 Py_ssize_t count, i;
287 PyObject *result;
288
289 self_len = STRINGLIB_LEN(self);
290
291 /* 1 at the end plus 1 after every character;
292 count = min(maxcount, self_len + 1) */
293 if (maxcount <= self_len) {
294 count = maxcount;
295 }
296 else {
297 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
298 count = self_len + 1;
299 }
300
301 /* Check for overflow */
302 /* result_len = count * to_len + self_len; */
303 assert(count > 0);
304 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
305 PyErr_SetString(PyExc_OverflowError,
306 "replace bytes is too long");
307 return NULL;
308 }
309 result_len = count * to_len + self_len;
310 result = STRINGLIB_NEW(NULL, result_len);
311 if (result == NULL) {
312 return NULL;
313 }
314
315 self_s = STRINGLIB_STR(self);
316 result_s = STRINGLIB_STR(result);
317
318 if (to_len > 1) {
319 /* Lay the first one down (guaranteed this will occur) */
320 memcpy(result_s, to_s, to_len);
321 result_s += to_len;
322 count -= 1;
323
324 for (i = 0; i < count; i++) {
325 *result_s++ = *self_s++;
326 memcpy(result_s, to_s, to_len);
327 result_s += to_len;
328 }
329 }
330 else {
331 result_s[0] = to_s[0];
332 result_s += to_len;
333 count -= 1;
334 for (i = 0; i < count; i++) {
335 *result_s++ = *self_s++;
336 result_s[0] = to_s[0];
337 result_s += to_len;
338 }
339 }
340
341 /* Copy the rest of the original string */
342 memcpy(result_s, self_s, self_len - i);
343
344 return result;
345 }
346
347 /* Special case for deleting a single character */
348 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
349 static PyObject *
stringlib_replace_delete_single_character(PyObject * self,char from_c,Py_ssize_t maxcount)350 stringlib_replace_delete_single_character(PyObject *self,
351 char from_c, Py_ssize_t maxcount)
352 {
353 const char *self_s, *start, *next, *end;
354 char *result_s;
355 Py_ssize_t self_len, result_len;
356 Py_ssize_t count;
357 PyObject *result;
358
359 self_len = STRINGLIB_LEN(self);
360 self_s = STRINGLIB_STR(self);
361
362 count = countchar(self_s, self_len, from_c, maxcount);
363 if (count == 0) {
364 return return_self(self);
365 }
366
367 result_len = self_len - count; /* from_len == 1 */
368 assert(result_len>=0);
369
370 result = STRINGLIB_NEW(NULL, result_len);
371 if (result == NULL) {
372 return NULL;
373 }
374 result_s = STRINGLIB_STR(result);
375
376 start = self_s;
377 end = self_s + self_len;
378 while (count-- > 0) {
379 next = findchar(start, end - start, from_c);
380 if (next == NULL)
381 break;
382 memcpy(result_s, start, next - start);
383 result_s += (next - start);
384 start = next + 1;
385 }
386 memcpy(result_s, start, end - start);
387
388 return result;
389 }
390
391 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
392
393 static PyObject *
stringlib_replace_delete_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)394 stringlib_replace_delete_substring(PyObject *self,
395 const char *from_s, Py_ssize_t from_len,
396 Py_ssize_t maxcount)
397 {
398 const char *self_s, *start, *next, *end;
399 char *result_s;
400 Py_ssize_t self_len, result_len;
401 Py_ssize_t count, offset;
402 PyObject *result;
403
404 self_len = STRINGLIB_LEN(self);
405 self_s = STRINGLIB_STR(self);
406
407 count = stringlib_count(self_s, self_len,
408 from_s, from_len,
409 maxcount);
410
411 if (count == 0) {
412 /* no matches */
413 return return_self(self);
414 }
415
416 result_len = self_len - (count * from_len);
417 assert (result_len>=0);
418
419 result = STRINGLIB_NEW(NULL, result_len);
420 if (result == NULL) {
421 return NULL;
422 }
423 result_s = STRINGLIB_STR(result);
424
425 start = self_s;
426 end = self_s + self_len;
427 while (count-- > 0) {
428 offset = stringlib_find(start, end - start,
429 from_s, from_len,
430 0);
431 if (offset == -1)
432 break;
433 next = start + offset;
434
435 memcpy(result_s, start, next - start);
436
437 result_s += (next - start);
438 start = next + from_len;
439 }
440 memcpy(result_s, start, end - start);
441 return result;
442 }
443
444 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
445 static PyObject *
stringlib_replace_single_character_in_place(PyObject * self,char from_c,char to_c,Py_ssize_t maxcount)446 stringlib_replace_single_character_in_place(PyObject *self,
447 char from_c, char to_c,
448 Py_ssize_t maxcount)
449 {
450 const char *self_s, *end;
451 char *result_s, *start, *next;
452 Py_ssize_t self_len;
453 PyObject *result;
454
455 /* The result string will be the same size */
456 self_s = STRINGLIB_STR(self);
457 self_len = STRINGLIB_LEN(self);
458
459 next = findchar(self_s, self_len, from_c);
460
461 if (next == NULL) {
462 /* No matches; return the original bytes */
463 return return_self(self);
464 }
465
466 /* Need to make a new bytes */
467 result = STRINGLIB_NEW(NULL, self_len);
468 if (result == NULL) {
469 return NULL;
470 }
471 result_s = STRINGLIB_STR(result);
472 memcpy(result_s, self_s, self_len);
473
474 /* change everything in-place, starting with this one */
475 start = result_s + (next - self_s);
476 *start = to_c;
477 start++;
478 end = result_s + self_len;
479
480 while (--maxcount > 0) {
481 next = findchar(start, end - start, from_c);
482 if (next == NULL)
483 break;
484 *next = to_c;
485 start = next + 1;
486 }
487
488 return result;
489 }
490
491 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
492 static PyObject *
stringlib_replace_substring_in_place(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)493 stringlib_replace_substring_in_place(PyObject *self,
494 const char *from_s, Py_ssize_t from_len,
495 const char *to_s, Py_ssize_t to_len,
496 Py_ssize_t maxcount)
497 {
498 const char *self_s, *end;
499 char *result_s, *start;
500 Py_ssize_t self_len, offset;
501 PyObject *result;
502
503 /* The result bytes will be the same size */
504
505 self_s = STRINGLIB_STR(self);
506 self_len = STRINGLIB_LEN(self);
507
508 offset = stringlib_find(self_s, self_len,
509 from_s, from_len,
510 0);
511 if (offset == -1) {
512 /* No matches; return the original bytes */
513 return return_self(self);
514 }
515
516 /* Need to make a new bytes */
517 result = STRINGLIB_NEW(NULL, self_len);
518 if (result == NULL) {
519 return NULL;
520 }
521 result_s = STRINGLIB_STR(result);
522 memcpy(result_s, self_s, self_len);
523
524 /* change everything in-place, starting with this one */
525 start = result_s + offset;
526 memcpy(start, to_s, from_len);
527 start += from_len;
528 end = result_s + self_len;
529
530 while ( --maxcount > 0) {
531 offset = stringlib_find(start, end - start,
532 from_s, from_len,
533 0);
534 if (offset == -1)
535 break;
536 memcpy(start + offset, to_s, from_len);
537 start += offset + from_len;
538 }
539
540 return result;
541 }
542
543 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
544 static PyObject *
stringlib_replace_single_character(PyObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)545 stringlib_replace_single_character(PyObject *self,
546 char from_c,
547 const char *to_s, Py_ssize_t to_len,
548 Py_ssize_t maxcount)
549 {
550 const char *self_s, *start, *next, *end;
551 char *result_s;
552 Py_ssize_t self_len, result_len;
553 Py_ssize_t count;
554 PyObject *result;
555
556 self_s = STRINGLIB_STR(self);
557 self_len = STRINGLIB_LEN(self);
558
559 count = countchar(self_s, self_len, from_c, maxcount);
560 if (count == 0) {
561 /* no matches, return unchanged */
562 return return_self(self);
563 }
564
565 /* use the difference between current and new, hence the "-1" */
566 /* result_len = self_len + count * (to_len-1) */
567 assert(count > 0);
568 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
569 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
570 return NULL;
571 }
572 result_len = self_len + count * (to_len - 1);
573
574 result = STRINGLIB_NEW(NULL, result_len);
575 if (result == NULL) {
576 return NULL;
577 }
578 result_s = STRINGLIB_STR(result);
579
580 start = self_s;
581 end = self_s + self_len;
582 while (count-- > 0) {
583 next = findchar(start, end - start, from_c);
584 if (next == NULL)
585 break;
586
587 if (next == start) {
588 /* replace with the 'to' */
589 memcpy(result_s, to_s, to_len);
590 result_s += to_len;
591 start += 1;
592 } else {
593 /* copy the unchanged old then the 'to' */
594 memcpy(result_s, start, next - start);
595 result_s += (next - start);
596 memcpy(result_s, to_s, to_len);
597 result_s += to_len;
598 start = next + 1;
599 }
600 }
601 /* Copy the remainder of the remaining bytes */
602 memcpy(result_s, start, end - start);
603
604 return result;
605 }
606
607 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
608 static PyObject *
stringlib_replace_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)609 stringlib_replace_substring(PyObject *self,
610 const char *from_s, Py_ssize_t from_len,
611 const char *to_s, Py_ssize_t to_len,
612 Py_ssize_t maxcount)
613 {
614 const char *self_s, *start, *next, *end;
615 char *result_s;
616 Py_ssize_t self_len, result_len;
617 Py_ssize_t count, offset;
618 PyObject *result;
619
620 self_s = STRINGLIB_STR(self);
621 self_len = STRINGLIB_LEN(self);
622
623 count = stringlib_count(self_s, self_len,
624 from_s, from_len,
625 maxcount);
626
627 if (count == 0) {
628 /* no matches, return unchanged */
629 return return_self(self);
630 }
631
632 /* Check for overflow */
633 /* result_len = self_len + count * (to_len-from_len) */
634 assert(count > 0);
635 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
636 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
637 return NULL;
638 }
639 result_len = self_len + count * (to_len - from_len);
640
641 result = STRINGLIB_NEW(NULL, result_len);
642 if (result == NULL) {
643 return NULL;
644 }
645 result_s = STRINGLIB_STR(result);
646
647 start = self_s;
648 end = self_s + self_len;
649 while (count-- > 0) {
650 offset = stringlib_find(start, end - start,
651 from_s, from_len,
652 0);
653 if (offset == -1)
654 break;
655 next = start + offset;
656 if (next == start) {
657 /* replace with the 'to' */
658 memcpy(result_s, to_s, to_len);
659 result_s += to_len;
660 start += from_len;
661 } else {
662 /* copy the unchanged old then the 'to' */
663 memcpy(result_s, start, next - start);
664 result_s += (next - start);
665 memcpy(result_s, to_s, to_len);
666 result_s += to_len;
667 start = next + from_len;
668 }
669 }
670 /* Copy the remainder of the remaining bytes */
671 memcpy(result_s, start, end - start);
672
673 return result;
674 }
675
676
677 static PyObject *
stringlib_replace(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)678 stringlib_replace(PyObject *self,
679 const char *from_s, Py_ssize_t from_len,
680 const char *to_s, Py_ssize_t to_len,
681 Py_ssize_t maxcount)
682 {
683 if (STRINGLIB_LEN(self) < from_len) {
684 /* nothing to do; return the original bytes */
685 return return_self(self);
686 }
687 if (maxcount < 0) {
688 maxcount = PY_SSIZE_T_MAX;
689 } else if (maxcount == 0) {
690 /* nothing to do; return the original bytes */
691 return return_self(self);
692 }
693
694 /* Handle zero-length special cases */
695 if (from_len == 0) {
696 if (to_len == 0) {
697 /* nothing to do; return the original bytes */
698 return return_self(self);
699 }
700 /* insert the 'to' bytes everywhere. */
701 /* >>> b"Python".replace(b"", b".") */
702 /* b'.P.y.t.h.o.n.' */
703 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
704 }
705
706 if (to_len == 0) {
707 /* delete all occurrences of 'from' bytes */
708 if (from_len == 1) {
709 return stringlib_replace_delete_single_character(
710 self, from_s[0], maxcount);
711 } else {
712 return stringlib_replace_delete_substring(
713 self, from_s, from_len, maxcount);
714 }
715 }
716
717 /* Handle special case where both bytes have the same length */
718
719 if (from_len == to_len) {
720 if (from_len == 1) {
721 return stringlib_replace_single_character_in_place(
722 self, from_s[0], to_s[0], maxcount);
723 } else {
724 return stringlib_replace_substring_in_place(
725 self, from_s, from_len, to_s, to_len, maxcount);
726 }
727 }
728
729 /* Otherwise use the more generic algorithms */
730 if (from_len == 1) {
731 return stringlib_replace_single_character(
732 self, from_s[0], to_s, to_len, maxcount);
733 } else {
734 /* len('from')>=2, len('to')>=1 */
735 return stringlib_replace_substring(
736 self, from_s, from_len, to_s, to_len, maxcount);
737 }
738 }
739
740 #undef findchar
741