1 #if STRINGLIB_IS_UNICODE
2 # error "transmogrify.h only compatible with byte-wise strings"
3 #endif
4
5 /* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
8 static inline PyObject *
return_self(PyObject * self)9 return_self(PyObject *self)
10 {
11 #if !STRINGLIB_MUTABLE
12 if (STRINGLIB_CHECK_EXACT(self)) {
13 Py_INCREF(self);
14 return self;
15 }
16 #endif
17 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
18 }
19
20 static PyObject*
stringlib_expandtabs(PyObject * self,PyObject * args,PyObject * kwds)21 stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
22 {
23 const char *e, *p;
24 char *q;
25 Py_ssize_t i, j;
26 PyObject *u;
27 static char *kwlist[] = {"tabsize", 0};
28 int tabsize = 8;
29
30 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
31 kwlist, &tabsize))
32 return NULL;
33
34 /* First pass: determine size of output string */
35 i = j = 0;
36 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
37 for (p = STRINGLIB_STR(self); p < e; p++) {
38 if (*p == '\t') {
39 if (tabsize > 0) {
40 Py_ssize_t incr = tabsize - (j % tabsize);
41 if (j > PY_SSIZE_T_MAX - incr)
42 goto overflow;
43 j += incr;
44 }
45 }
46 else {
47 if (j > PY_SSIZE_T_MAX - 1)
48 goto overflow;
49 j++;
50 if (*p == '\n' || *p == '\r') {
51 if (i > PY_SSIZE_T_MAX - j)
52 goto overflow;
53 i += j;
54 j = 0;
55 }
56 }
57 }
58
59 if (i > PY_SSIZE_T_MAX - j)
60 goto overflow;
61
62 /* Second pass: create output string and fill it */
63 u = STRINGLIB_NEW(NULL, i + j);
64 if (!u)
65 return NULL;
66
67 j = 0;
68 q = STRINGLIB_STR(u);
69
70 for (p = STRINGLIB_STR(self); p < e; p++) {
71 if (*p == '\t') {
72 if (tabsize > 0) {
73 i = tabsize - (j % tabsize);
74 j += i;
75 while (i--)
76 *q++ = ' ';
77 }
78 }
79 else {
80 j++;
81 *q++ = *p;
82 if (*p == '\n' || *p == '\r')
83 j = 0;
84 }
85 }
86
87 return u;
88 overflow:
89 PyErr_SetString(PyExc_OverflowError, "result too long");
90 return NULL;
91 }
92
93 static inline PyObject *
pad(PyObject * self,Py_ssize_t left,Py_ssize_t right,char fill)94 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
95 {
96 PyObject *u;
97
98 if (left < 0)
99 left = 0;
100 if (right < 0)
101 right = 0;
102
103 if (left == 0 && right == 0) {
104 return return_self(self);
105 }
106
107 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
108 if (u) {
109 if (left)
110 memset(STRINGLIB_STR(u), fill, left);
111 memcpy(STRINGLIB_STR(u) + left,
112 STRINGLIB_STR(self),
113 STRINGLIB_LEN(self));
114 if (right)
115 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
116 fill, right);
117 }
118
119 return u;
120 }
121
122 static PyObject *
stringlib_ljust(PyObject * self,PyObject * args)123 stringlib_ljust(PyObject *self, PyObject *args)
124 {
125 Py_ssize_t width;
126 char fillchar = ' ';
127
128 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
129 return NULL;
130
131 if (STRINGLIB_LEN(self) >= width) {
132 return return_self(self);
133 }
134
135 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
136 }
137
138
139 static PyObject *
stringlib_rjust(PyObject * self,PyObject * args)140 stringlib_rjust(PyObject *self, PyObject *args)
141 {
142 Py_ssize_t width;
143 char fillchar = ' ';
144
145 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
146 return NULL;
147
148 if (STRINGLIB_LEN(self) >= width) {
149 return return_self(self);
150 }
151
152 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
153 }
154
155
156 static PyObject *
stringlib_center(PyObject * self,PyObject * args)157 stringlib_center(PyObject *self, PyObject *args)
158 {
159 Py_ssize_t marg, left;
160 Py_ssize_t width;
161 char fillchar = ' ';
162
163 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
164 return NULL;
165
166 if (STRINGLIB_LEN(self) >= width) {
167 return return_self(self);
168 }
169
170 marg = width - STRINGLIB_LEN(self);
171 left = marg / 2 + (marg & width & 1);
172
173 return pad(self, left, marg - left, fillchar);
174 }
175
176 static PyObject *
stringlib_zfill(PyObject * self,PyObject * args)177 stringlib_zfill(PyObject *self, PyObject *args)
178 {
179 Py_ssize_t fill;
180 PyObject *s;
181 char *p;
182 Py_ssize_t width;
183
184 if (!PyArg_ParseTuple(args, "n:zfill", &width))
185 return NULL;
186
187 if (STRINGLIB_LEN(self) >= width) {
188 return return_self(self);
189 }
190
191 fill = width - STRINGLIB_LEN(self);
192
193 s = pad(self, fill, 0, '0');
194
195 if (s == NULL)
196 return NULL;
197
198 p = STRINGLIB_STR(s);
199 if (p[fill] == '+' || p[fill] == '-') {
200 /* move sign to beginning of string */
201 p[0] = p[fill];
202 p[fill] = '0';
203 }
204
205 return s;
206 }
207
208
209 /* find and count characters and substrings */
210
211 #define findchar(target, target_len, c) \
212 ((char *)memchr((const void *)(target), c, target_len))
213
214
215 static Py_ssize_t
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)216 countchar(const char *target, Py_ssize_t target_len, char c,
217 Py_ssize_t maxcount)
218 {
219 Py_ssize_t count = 0;
220 const char *start = target;
221 const char *end = target + target_len;
222
223 while ((start = findchar(start, end - start, c)) != NULL) {
224 count++;
225 if (count >= maxcount)
226 break;
227 start += 1;
228 }
229 return count;
230 }
231
232
233 /* Algorithms for different cases of string replacement */
234
235 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
236 static PyObject *
stringlib_replace_interleave(PyObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)237 stringlib_replace_interleave(PyObject *self,
238 const char *to_s, Py_ssize_t to_len,
239 Py_ssize_t maxcount)
240 {
241 const char *self_s;
242 char *result_s;
243 Py_ssize_t self_len, result_len;
244 Py_ssize_t count, i;
245 PyObject *result;
246
247 self_len = STRINGLIB_LEN(self);
248
249 /* 1 at the end plus 1 after every character;
250 count = min(maxcount, self_len + 1) */
251 if (maxcount <= self_len) {
252 count = maxcount;
253 }
254 else {
255 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
256 count = self_len + 1;
257 }
258
259 /* Check for overflow */
260 /* result_len = count * to_len + self_len; */
261 assert(count > 0);
262 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
263 PyErr_SetString(PyExc_OverflowError,
264 "replace bytes are too long");
265 return NULL;
266 }
267 result_len = count * to_len + self_len;
268 result = STRINGLIB_NEW(NULL, result_len);
269 if (result == NULL) {
270 return NULL;
271 }
272
273 self_s = STRINGLIB_STR(self);
274 result_s = STRINGLIB_STR(result);
275
276 if (to_len > 1) {
277 /* Lay the first one down (guaranteed this will occur) */
278 memcpy(result_s, to_s, to_len);
279 result_s += to_len;
280 count -= 1;
281
282 for (i = 0; i < count; i++) {
283 *result_s++ = *self_s++;
284 memcpy(result_s, to_s, to_len);
285 result_s += to_len;
286 }
287 }
288 else {
289 result_s[0] = to_s[0];
290 result_s += to_len;
291 count -= 1;
292 for (i = 0; i < count; i++) {
293 *result_s++ = *self_s++;
294 result_s[0] = to_s[0];
295 result_s += to_len;
296 }
297 }
298
299 /* Copy the rest of the original string */
300 memcpy(result_s, self_s, self_len - i);
301
302 return result;
303 }
304
305 /* Special case for deleting a single character */
306 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
307 static PyObject *
stringlib_replace_delete_single_character(PyObject * self,char from_c,Py_ssize_t maxcount)308 stringlib_replace_delete_single_character(PyObject *self,
309 char from_c, Py_ssize_t maxcount)
310 {
311 const char *self_s, *start, *next, *end;
312 char *result_s;
313 Py_ssize_t self_len, result_len;
314 Py_ssize_t count;
315 PyObject *result;
316
317 self_len = STRINGLIB_LEN(self);
318 self_s = STRINGLIB_STR(self);
319
320 count = countchar(self_s, self_len, from_c, maxcount);
321 if (count == 0) {
322 return return_self(self);
323 }
324
325 result_len = self_len - count; /* from_len == 1 */
326 assert(result_len>=0);
327
328 result = STRINGLIB_NEW(NULL, result_len);
329 if (result == NULL) {
330 return NULL;
331 }
332 result_s = STRINGLIB_STR(result);
333
334 start = self_s;
335 end = self_s + self_len;
336 while (count-- > 0) {
337 next = findchar(start, end - start, from_c);
338 if (next == NULL)
339 break;
340 memcpy(result_s, start, next - start);
341 result_s += (next - start);
342 start = next + 1;
343 }
344 memcpy(result_s, start, end - start);
345
346 return result;
347 }
348
349 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
350
351 static PyObject *
stringlib_replace_delete_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)352 stringlib_replace_delete_substring(PyObject *self,
353 const char *from_s, Py_ssize_t from_len,
354 Py_ssize_t maxcount)
355 {
356 const char *self_s, *start, *next, *end;
357 char *result_s;
358 Py_ssize_t self_len, result_len;
359 Py_ssize_t count, offset;
360 PyObject *result;
361
362 self_len = STRINGLIB_LEN(self);
363 self_s = STRINGLIB_STR(self);
364
365 count = stringlib_count(self_s, self_len,
366 from_s, from_len,
367 maxcount);
368
369 if (count == 0) {
370 /* no matches */
371 return return_self(self);
372 }
373
374 result_len = self_len - (count * from_len);
375 assert (result_len>=0);
376
377 result = STRINGLIB_NEW(NULL, result_len);
378 if (result == NULL) {
379 return NULL;
380 }
381 result_s = STRINGLIB_STR(result);
382
383 start = self_s;
384 end = self_s + self_len;
385 while (count-- > 0) {
386 offset = stringlib_find(start, end - start,
387 from_s, from_len,
388 0);
389 if (offset == -1)
390 break;
391 next = start + offset;
392
393 memcpy(result_s, start, next - start);
394
395 result_s += (next - start);
396 start = next + from_len;
397 }
398 memcpy(result_s, start, end - start);
399 return result;
400 }
401
402 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
403 static PyObject *
stringlib_replace_single_character_in_place(PyObject * self,char from_c,char to_c,Py_ssize_t maxcount)404 stringlib_replace_single_character_in_place(PyObject *self,
405 char from_c, char to_c,
406 Py_ssize_t maxcount)
407 {
408 const char *self_s, *end;
409 char *result_s, *start, *next;
410 Py_ssize_t self_len;
411 PyObject *result;
412
413 /* The result string will be the same size */
414 self_s = STRINGLIB_STR(self);
415 self_len = STRINGLIB_LEN(self);
416
417 next = findchar(self_s, self_len, from_c);
418
419 if (next == NULL) {
420 /* No matches; return the original bytes */
421 return return_self(self);
422 }
423
424 /* Need to make a new bytes */
425 result = STRINGLIB_NEW(NULL, self_len);
426 if (result == NULL) {
427 return NULL;
428 }
429 result_s = STRINGLIB_STR(result);
430 memcpy(result_s, self_s, self_len);
431
432 /* change everything in-place, starting with this one */
433 start = result_s + (next - self_s);
434 *start = to_c;
435 start++;
436 end = result_s + self_len;
437
438 while (--maxcount > 0) {
439 next = findchar(start, end - start, from_c);
440 if (next == NULL)
441 break;
442 *next = to_c;
443 start = next + 1;
444 }
445
446 return result;
447 }
448
449 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
450 static PyObject *
stringlib_replace_substring_in_place(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)451 stringlib_replace_substring_in_place(PyObject *self,
452 const char *from_s, Py_ssize_t from_len,
453 const char *to_s, Py_ssize_t to_len,
454 Py_ssize_t maxcount)
455 {
456 const char *self_s, *end;
457 char *result_s, *start;
458 Py_ssize_t self_len, offset;
459 PyObject *result;
460
461 /* The result bytes will be the same size */
462
463 self_s = STRINGLIB_STR(self);
464 self_len = STRINGLIB_LEN(self);
465
466 offset = stringlib_find(self_s, self_len,
467 from_s, from_len,
468 0);
469 if (offset == -1) {
470 /* No matches; return the original bytes */
471 return return_self(self);
472 }
473
474 /* Need to make a new bytes */
475 result = STRINGLIB_NEW(NULL, self_len);
476 if (result == NULL) {
477 return NULL;
478 }
479 result_s = STRINGLIB_STR(result);
480 memcpy(result_s, self_s, self_len);
481
482 /* change everything in-place, starting with this one */
483 start = result_s + offset;
484 memcpy(start, to_s, from_len);
485 start += from_len;
486 end = result_s + self_len;
487
488 while ( --maxcount > 0) {
489 offset = stringlib_find(start, end - start,
490 from_s, from_len,
491 0);
492 if (offset == -1)
493 break;
494 memcpy(start + offset, to_s, from_len);
495 start += offset + from_len;
496 }
497
498 return result;
499 }
500
501 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
502 static PyObject *
stringlib_replace_single_character(PyObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)503 stringlib_replace_single_character(PyObject *self,
504 char from_c,
505 const char *to_s, Py_ssize_t to_len,
506 Py_ssize_t maxcount)
507 {
508 const char *self_s, *start, *next, *end;
509 char *result_s;
510 Py_ssize_t self_len, result_len;
511 Py_ssize_t count;
512 PyObject *result;
513
514 self_s = STRINGLIB_STR(self);
515 self_len = STRINGLIB_LEN(self);
516
517 count = countchar(self_s, self_len, from_c, maxcount);
518 if (count == 0) {
519 /* no matches, return unchanged */
520 return return_self(self);
521 }
522
523 /* use the difference between current and new, hence the "-1" */
524 /* result_len = self_len + count * (to_len-1) */
525 assert(count > 0);
526 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
527 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
528 return NULL;
529 }
530 result_len = self_len + count * (to_len - 1);
531
532 result = STRINGLIB_NEW(NULL, result_len);
533 if (result == NULL) {
534 return NULL;
535 }
536 result_s = STRINGLIB_STR(result);
537
538 start = self_s;
539 end = self_s + self_len;
540 while (count-- > 0) {
541 next = findchar(start, end - start, from_c);
542 if (next == NULL)
543 break;
544
545 if (next == start) {
546 /* replace with the 'to' */
547 memcpy(result_s, to_s, to_len);
548 result_s += to_len;
549 start += 1;
550 } else {
551 /* copy the unchanged old then the 'to' */
552 memcpy(result_s, start, next - start);
553 result_s += (next - start);
554 memcpy(result_s, to_s, to_len);
555 result_s += to_len;
556 start = next + 1;
557 }
558 }
559 /* Copy the remainder of the remaining bytes */
560 memcpy(result_s, start, end - start);
561
562 return result;
563 }
564
565 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
566 static PyObject *
stringlib_replace_substring(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)567 stringlib_replace_substring(PyObject *self,
568 const char *from_s, Py_ssize_t from_len,
569 const char *to_s, Py_ssize_t to_len,
570 Py_ssize_t maxcount)
571 {
572 const char *self_s, *start, *next, *end;
573 char *result_s;
574 Py_ssize_t self_len, result_len;
575 Py_ssize_t count, offset;
576 PyObject *result;
577
578 self_s = STRINGLIB_STR(self);
579 self_len = STRINGLIB_LEN(self);
580
581 count = stringlib_count(self_s, self_len,
582 from_s, from_len,
583 maxcount);
584
585 if (count == 0) {
586 /* no matches, return unchanged */
587 return return_self(self);
588 }
589
590 /* Check for overflow */
591 /* result_len = self_len + count * (to_len-from_len) */
592 assert(count > 0);
593 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
594 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
595 return NULL;
596 }
597 result_len = self_len + count * (to_len - from_len);
598
599 result = STRINGLIB_NEW(NULL, result_len);
600 if (result == NULL) {
601 return NULL;
602 }
603 result_s = STRINGLIB_STR(result);
604
605 start = self_s;
606 end = self_s + self_len;
607 while (count-- > 0) {
608 offset = stringlib_find(start, end - start,
609 from_s, from_len,
610 0);
611 if (offset == -1)
612 break;
613 next = start + offset;
614 if (next == start) {
615 /* replace with the 'to' */
616 memcpy(result_s, to_s, to_len);
617 result_s += to_len;
618 start += from_len;
619 } else {
620 /* copy the unchanged old then the 'to' */
621 memcpy(result_s, start, next - start);
622 result_s += (next - start);
623 memcpy(result_s, to_s, to_len);
624 result_s += to_len;
625 start = next + from_len;
626 }
627 }
628 /* Copy the remainder of the remaining bytes */
629 memcpy(result_s, start, end - start);
630
631 return result;
632 }
633
634
635 static PyObject *
stringlib_replace(PyObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)636 stringlib_replace(PyObject *self,
637 const char *from_s, Py_ssize_t from_len,
638 const char *to_s, Py_ssize_t to_len,
639 Py_ssize_t maxcount)
640 {
641 if (maxcount < 0) {
642 maxcount = PY_SSIZE_T_MAX;
643 } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
644 /* nothing to do; return the original bytes */
645 return return_self(self);
646 }
647
648 /* Handle zero-length special cases */
649 if (from_len == 0) {
650 if (to_len == 0) {
651 /* nothing to do; return the original bytes */
652 return return_self(self);
653 }
654 /* insert the 'to' bytes everywhere. */
655 /* >>> b"Python".replace(b"", b".") */
656 /* b'.P.y.t.h.o.n.' */
657 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
658 }
659
660 /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
661 /* point for an empty self bytes to generate a non-empty bytes */
662 /* Special case so the remaining code always gets a non-empty bytes */
663 if (STRINGLIB_LEN(self) == 0) {
664 return return_self(self);
665 }
666
667 if (to_len == 0) {
668 /* delete all occurrences of 'from' bytes */
669 if (from_len == 1) {
670 return stringlib_replace_delete_single_character(
671 self, from_s[0], maxcount);
672 } else {
673 return stringlib_replace_delete_substring(
674 self, from_s, from_len, maxcount);
675 }
676 }
677
678 /* Handle special case where both bytes have the same length */
679
680 if (from_len == to_len) {
681 if (from_len == 1) {
682 return stringlib_replace_single_character_in_place(
683 self, from_s[0], to_s[0], maxcount);
684 } else {
685 return stringlib_replace_substring_in_place(
686 self, from_s, from_len, to_s, to_len, maxcount);
687 }
688 }
689
690 /* Otherwise use the more generic algorithms */
691 if (from_len == 1) {
692 return stringlib_replace_single_character(
693 self, from_s[0], to_s, to_len, maxcount);
694 } else {
695 /* len('from')>=2, len('to')>=1 */
696 return stringlib_replace_substring(
697 self, from_s, from_len, to_s, to_len, maxcount);
698 }
699 }
700
701 #undef findchar
702