1 /* String (str/bytes) object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34 /*
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
37
38 For PyString_FromStringAndSize(), the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67 null_strings++;
68 #endif
69 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
75 #ifdef COUNT_ALLOCS
76 one_strings++;
77 #endif
78 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
86
87 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
112 }
113
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117 register size_t size;
118 register PyStringObject *op;
119
120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129 null_strings++;
130 #endif
131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136 one_strings++;
137 #endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141
142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165 }
166
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
175
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef __va_copy
180 __va_copy(count, vargs);
181 #else
182 count = vargs;
183 #endif
184 #endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag = 0;
190 #endif
191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
194
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
202 #ifdef HAVE_LONG_LONG
203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
208 #endif
209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
213
214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
231 #endif
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
237
238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
265 expand:
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
272
273 s = PyString_AsString(string);
274
275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281 int longlongflag = 0;
282 #endif
283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
303 #ifdef HAVE_LONG_LONG
304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
309 #endif
310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
316
317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
328 #endif
329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
344 #endif
345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
392
393 end:
394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
397 }
398
399 PyObject *
PyString_FromFormat(const char * format,...)400 PyString_FromFormat(const char *format, ...)
401 {
402 PyObject* ret;
403 va_list vargs;
404
405 #ifdef HAVE_STDARG_PROTOTYPES
406 va_start(vargs, format);
407 #else
408 va_start(vargs);
409 #endif
410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
413 }
414
415
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)416 PyObject *PyString_Decode(const char *s,
417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
420 {
421 PyObject *v, *str;
422
423 str = PyString_FromStringAndSize(s, size);
424 if (str == NULL)
425 return NULL;
426 v = PyString_AsDecodedString(str, encoding, errors);
427 Py_DECREF(str);
428 return v;
429 }
430
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)431 PyObject *PyString_AsDecodedObject(PyObject *str,
432 const char *encoding,
433 const char *errors)
434 {
435 PyObject *v;
436
437 if (!PyString_Check(str)) {
438 PyErr_BadArgument();
439 goto onError;
440 }
441
442 if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444 encoding = PyUnicode_GetDefaultEncoding();
445 #else
446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
448 #endif
449 }
450
451 /* Decode via the codec registry */
452 v = _PyCodec_DecodeText(str, encoding, errors);
453 if (v == NULL)
454 goto onError;
455
456 return v;
457
458 onError:
459 return NULL;
460 }
461
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)462 PyObject *PyString_AsDecodedString(PyObject *str,
463 const char *encoding,
464 const char *errors)
465 {
466 PyObject *v;
467
468 v = PyString_AsDecodedObject(str, encoding, errors);
469 if (v == NULL)
470 goto onError;
471
472 #ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
480 }
481 #endif
482 if (!PyString_Check(v)) {
483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
488 }
489
490 return v;
491
492 onError:
493 return NULL;
494 }
495
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)496 PyObject *PyString_Encode(const char *s,
497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
500 {
501 PyObject *v, *str;
502
503 str = PyString_FromStringAndSize(s, size);
504 if (str == NULL)
505 return NULL;
506 v = PyString_AsEncodedString(str, encoding, errors);
507 Py_DECREF(str);
508 return v;
509 }
510
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)511 PyObject *PyString_AsEncodedObject(PyObject *str,
512 const char *encoding,
513 const char *errors)
514 {
515 PyObject *v;
516
517 if (!PyString_Check(str)) {
518 PyErr_BadArgument();
519 goto onError;
520 }
521
522 if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524 encoding = PyUnicode_GetDefaultEncoding();
525 #else
526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
528 #endif
529 }
530
531 /* Encode via the codec registry */
532 v = _PyCodec_EncodeText(str, encoding, errors);
533 if (v == NULL)
534 goto onError;
535
536 return v;
537
538 onError:
539 return NULL;
540 }
541
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)542 PyObject *PyString_AsEncodedString(PyObject *str,
543 const char *encoding,
544 const char *errors)
545 {
546 PyObject *v;
547
548 v = PyString_AsEncodedObject(str, encoding, errors);
549 if (v == NULL)
550 goto onError;
551
552 #ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
560 }
561 #endif
562 if (!PyString_Check(v)) {
563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
568 }
569
570 return v;
571
572 onError:
573 return NULL;
574 }
575
576 static void
string_dealloc(PyObject * op)577 string_dealloc(PyObject *op)
578 {
579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
582
583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
590
591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
593
594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
598 }
599
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)605 PyObject *PyString_DecodeEscape(const char *s,
606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
610 {
611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen;
616 /* Check for integer overflow */
617 if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) {
618 PyErr_SetString(PyExc_OverflowError, "string is too large");
619 return NULL;
620 }
621 newlen = recode_encoding ? 4*len:len;
622 v = PyString_FromStringAndSize((char *)NULL, newlen);
623 if (v == NULL)
624 return NULL;
625 p = buf = PyString_AsString(v);
626 end = s + len;
627 while (s < end) {
628 if (*s != '\\') {
629 non_esc:
630 #ifdef Py_USING_UNICODE
631 if (recode_encoding && (*s & 0x80)) {
632 PyObject *u, *w;
633 char *r;
634 const char* t;
635 Py_ssize_t rn;
636 t = s;
637 /* Decode non-ASCII bytes as UTF-8. */
638 while (t < end && (*t & 0x80)) t++;
639 u = PyUnicode_DecodeUTF8(s, t - s, errors);
640 if(!u) goto failed;
641
642 /* Recode them in target encoding. */
643 w = PyUnicode_AsEncodedString(
644 u, recode_encoding, errors);
645 Py_DECREF(u);
646 if (!w) goto failed;
647
648 /* Append bytes to output buffer. */
649 assert(PyString_Check(w));
650 r = PyString_AS_STRING(w);
651 rn = PyString_GET_SIZE(w);
652 Py_MEMCPY(p, r, rn);
653 p += rn;
654 Py_DECREF(w);
655 s = t;
656 } else {
657 *p++ = *s++;
658 }
659 #else
660 *p++ = *s++;
661 #endif
662 continue;
663 }
664 s++;
665 if (s==end) {
666 PyErr_SetString(PyExc_ValueError,
667 "Trailing \\ in string");
668 goto failed;
669 }
670 switch (*s++) {
671 /* XXX This assumes ASCII! */
672 case '\n': break;
673 case '\\': *p++ = '\\'; break;
674 case '\'': *p++ = '\''; break;
675 case '\"': *p++ = '\"'; break;
676 case 'b': *p++ = '\b'; break;
677 case 'f': *p++ = '\014'; break; /* FF */
678 case 't': *p++ = '\t'; break;
679 case 'n': *p++ = '\n'; break;
680 case 'r': *p++ = '\r'; break;
681 case 'v': *p++ = '\013'; break; /* VT */
682 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
683 case '0': case '1': case '2': case '3':
684 case '4': case '5': case '6': case '7':
685 c = s[-1] - '0';
686 if (s < end && '0' <= *s && *s <= '7') {
687 c = (c<<3) + *s++ - '0';
688 if (s < end && '0' <= *s && *s <= '7')
689 c = (c<<3) + *s++ - '0';
690 }
691 *p++ = c;
692 break;
693 case 'x':
694 if (s+1 < end &&
695 isxdigit(Py_CHARMASK(s[0])) &&
696 isxdigit(Py_CHARMASK(s[1])))
697 {
698 unsigned int x = 0;
699 c = Py_CHARMASK(*s);
700 s++;
701 if (isdigit(c))
702 x = c - '0';
703 else if (islower(c))
704 x = 10 + c - 'a';
705 else
706 x = 10 + c - 'A';
707 x = x << 4;
708 c = Py_CHARMASK(*s);
709 s++;
710 if (isdigit(c))
711 x += c - '0';
712 else if (islower(c))
713 x += 10 + c - 'a';
714 else
715 x += 10 + c - 'A';
716 *p++ = x;
717 break;
718 }
719 if (!errors || strcmp(errors, "strict") == 0) {
720 PyErr_SetString(PyExc_ValueError,
721 "invalid \\x escape");
722 goto failed;
723 }
724 if (strcmp(errors, "replace") == 0) {
725 *p++ = '?';
726 } else if (strcmp(errors, "ignore") == 0)
727 /* do nothing */;
728 else {
729 PyErr_Format(PyExc_ValueError,
730 "decoding error; "
731 "unknown error handling code: %.400s",
732 errors);
733 goto failed;
734 }
735 /* skip \x */
736 if (s < end && isxdigit(Py_CHARMASK(s[0])))
737 s++; /* and a hexdigit */
738 break;
739 #ifndef Py_USING_UNICODE
740 case 'u':
741 case 'U':
742 case 'N':
743 if (unicode) {
744 PyErr_SetString(PyExc_ValueError,
745 "Unicode escapes not legal "
746 "when Unicode disabled");
747 goto failed;
748 }
749 #endif
750 default:
751 *p++ = '\\';
752 s--;
753 goto non_esc; /* an arbitrary number of unescaped
754 UTF-8 bytes may follow. */
755 }
756 }
757 if (p-buf < newlen)
758 _PyString_Resize(&v, p - buf); /* v is cleared on error */
759 return v;
760 failed:
761 Py_DECREF(v);
762 return NULL;
763 }
764
765 /* -------------------------------------------------------------------- */
766 /* object api */
767
768 static Py_ssize_t
string_getsize(register PyObject * op)769 string_getsize(register PyObject *op)
770 {
771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return -1;
775 return len;
776 }
777
778 static /*const*/ char *
string_getbuffer(register PyObject * op)779 string_getbuffer(register PyObject *op)
780 {
781 char *s;
782 Py_ssize_t len;
783 if (PyString_AsStringAndSize(op, &s, &len))
784 return NULL;
785 return s;
786 }
787
788 Py_ssize_t
PyString_Size(register PyObject * op)789 PyString_Size(register PyObject *op)
790 {
791 if (!PyString_Check(op))
792 return string_getsize(op);
793 return Py_SIZE(op);
794 }
795
796 /*const*/ char *
PyString_AsString(register PyObject * op)797 PyString_AsString(register PyObject *op)
798 {
799 if (!PyString_Check(op))
800 return string_getbuffer(op);
801 return ((PyStringObject *)op) -> ob_sval;
802 }
803
804 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)805 PyString_AsStringAndSize(register PyObject *obj,
806 register char **s,
807 register Py_ssize_t *len)
808 {
809 if (s == NULL) {
810 PyErr_BadInternalCall();
811 return -1;
812 }
813
814 if (!PyString_Check(obj)) {
815 #ifdef Py_USING_UNICODE
816 if (PyUnicode_Check(obj)) {
817 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
818 if (obj == NULL)
819 return -1;
820 }
821 else
822 #endif
823 {
824 PyErr_Format(PyExc_TypeError,
825 "expected string or Unicode object, "
826 "%.200s found", Py_TYPE(obj)->tp_name);
827 return -1;
828 }
829 }
830
831 *s = PyString_AS_STRING(obj);
832 if (len != NULL)
833 *len = PyString_GET_SIZE(obj);
834 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
835 PyErr_SetString(PyExc_TypeError,
836 "expected string without null bytes");
837 return -1;
838 }
839 return 0;
840 }
841
842 /* -------------------------------------------------------------------- */
843 /* Methods */
844
845 #include "stringlib/stringdefs.h"
846 #include "stringlib/fastsearch.h"
847
848 #include "stringlib/count.h"
849 #include "stringlib/find.h"
850 #include "stringlib/partition.h"
851 #include "stringlib/split.h"
852
853 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
854 #include "stringlib/localeutil.h"
855
856
857
858 static int
string_print(PyStringObject * op,FILE * fp,int flags)859 string_print(PyStringObject *op, FILE *fp, int flags)
860 {
861 Py_ssize_t i, str_len;
862 char c;
863 int quote;
864
865 /* XXX Ought to check for interrupts when writing long strings */
866 if (! PyString_CheckExact(op)) {
867 int ret;
868 /* A str subclass may have its own __str__ method. */
869 op = (PyStringObject *) PyObject_Str((PyObject *)op);
870 if (op == NULL)
871 return -1;
872 ret = string_print(op, fp, flags);
873 Py_DECREF(op);
874 return ret;
875 }
876 if (flags & Py_PRINT_RAW) {
877 char *data = op->ob_sval;
878 Py_ssize_t size = Py_SIZE(op);
879 Py_BEGIN_ALLOW_THREADS
880 while (size > INT_MAX) {
881 /* Very long strings cannot be written atomically.
882 * But don't write exactly INT_MAX bytes at a time
883 * to avoid memory aligment issues.
884 */
885 const int chunk_size = INT_MAX & ~0x3FFF;
886 fwrite(data, 1, chunk_size, fp);
887 data += chunk_size;
888 size -= chunk_size;
889 }
890 #ifdef __VMS
891 if (size) fwrite(data, (size_t)size, 1, fp);
892 #else
893 fwrite(data, 1, (size_t)size, fp);
894 #endif
895 Py_END_ALLOW_THREADS
896 return 0;
897 }
898
899 /* figure out which quote to use; single is preferred */
900 quote = '\'';
901 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
902 !memchr(op->ob_sval, '"', Py_SIZE(op)))
903 quote = '"';
904
905 str_len = Py_SIZE(op);
906 Py_BEGIN_ALLOW_THREADS
907 fputc(quote, fp);
908 for (i = 0; i < str_len; i++) {
909 /* Since strings are immutable and the caller should have a
910 reference, accessing the internal buffer should not be an issue
911 with the GIL released. */
912 c = op->ob_sval[i];
913 if (c == quote || c == '\\')
914 fprintf(fp, "\\%c", c);
915 else if (c == '\t')
916 fprintf(fp, "\\t");
917 else if (c == '\n')
918 fprintf(fp, "\\n");
919 else if (c == '\r')
920 fprintf(fp, "\\r");
921 else if (c < ' ' || c >= 0x7f)
922 fprintf(fp, "\\x%02x", c & 0xff);
923 else
924 fputc(c, fp);
925 }
926 fputc(quote, fp);
927 Py_END_ALLOW_THREADS
928 return 0;
929 }
930
931 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)932 PyString_Repr(PyObject *obj, int smartquotes)
933 {
934 register PyStringObject* op = (PyStringObject*) obj;
935 size_t newsize;
936 PyObject *v;
937 if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
938 PyErr_SetString(PyExc_OverflowError,
939 "string is too large to make repr");
940 return NULL;
941 }
942 newsize = 2 + 4*Py_SIZE(op);
943 v = PyString_FromStringAndSize((char *)NULL, newsize);
944 if (v == NULL) {
945 return NULL;
946 }
947 else {
948 register Py_ssize_t i;
949 register char c;
950 register char *p;
951 int quote;
952
953 /* figure out which quote to use; single is preferred */
954 quote = '\'';
955 if (smartquotes &&
956 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
957 !memchr(op->ob_sval, '"', Py_SIZE(op)))
958 quote = '"';
959
960 p = PyString_AS_STRING(v);
961 *p++ = quote;
962 for (i = 0; i < Py_SIZE(op); i++) {
963 /* There's at least enough room for a hex escape
964 and a closing quote. */
965 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
966 c = op->ob_sval[i];
967 if (c == quote || c == '\\')
968 *p++ = '\\', *p++ = c;
969 else if (c == '\t')
970 *p++ = '\\', *p++ = 't';
971 else if (c == '\n')
972 *p++ = '\\', *p++ = 'n';
973 else if (c == '\r')
974 *p++ = '\\', *p++ = 'r';
975 else if (c < ' ' || c >= 0x7f) {
976 /* For performance, we don't want to call
977 PyOS_snprintf here (extra layers of
978 function call). */
979 sprintf(p, "\\x%02x", c & 0xff);
980 p += 4;
981 }
982 else
983 *p++ = c;
984 }
985 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
986 *p++ = quote;
987 *p = '\0';
988 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
989 return NULL;
990 return v;
991 }
992 }
993
994 static PyObject *
string_repr(PyObject * op)995 string_repr(PyObject *op)
996 {
997 return PyString_Repr(op, 1);
998 }
999
1000 static PyObject *
string_str(PyObject * s)1001 string_str(PyObject *s)
1002 {
1003 assert(PyString_Check(s));
1004 if (PyString_CheckExact(s)) {
1005 Py_INCREF(s);
1006 return s;
1007 }
1008 else {
1009 /* Subtype -- return genuine string with the same value. */
1010 PyStringObject *t = (PyStringObject *) s;
1011 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1012 }
1013 }
1014
1015 static Py_ssize_t
string_length(PyStringObject * a)1016 string_length(PyStringObject *a)
1017 {
1018 return Py_SIZE(a);
1019 }
1020
1021 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1022 string_concat(register PyStringObject *a, register PyObject *bb)
1023 {
1024 register Py_ssize_t size;
1025 register PyStringObject *op;
1026 if (!PyString_Check(bb)) {
1027 #ifdef Py_USING_UNICODE
1028 if (PyUnicode_Check(bb))
1029 return PyUnicode_Concat((PyObject *)a, bb);
1030 #endif
1031 if (PyByteArray_Check(bb))
1032 return PyByteArray_Concat((PyObject *)a, bb);
1033 PyErr_Format(PyExc_TypeError,
1034 "cannot concatenate 'str' and '%.200s' objects",
1035 Py_TYPE(bb)->tp_name);
1036 return NULL;
1037 }
1038 #define b ((PyStringObject *)bb)
1039 /* Optimize cases with empty left or right operand */
1040 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1041 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042 if (Py_SIZE(a) == 0) {
1043 Py_INCREF(bb);
1044 return bb;
1045 }
1046 Py_INCREF(a);
1047 return (PyObject *)a;
1048 }
1049 /* Check that string sizes are not negative, to prevent an
1050 overflow in cases where we are passed incorrectly-created
1051 strings with negative lengths (due to a bug in other code).
1052 */
1053 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1054 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1055 PyErr_SetString(PyExc_OverflowError,
1056 "strings are too large to concat");
1057 return NULL;
1058 }
1059 size = Py_SIZE(a) + Py_SIZE(b);
1060
1061 /* Inline PyObject_NewVar */
1062 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1063 PyErr_SetString(PyExc_OverflowError,
1064 "strings are too large to concat");
1065 return NULL;
1066 }
1067 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1068 if (op == NULL)
1069 return PyErr_NoMemory();
1070 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1071 op->ob_shash = -1;
1072 op->ob_sstate = SSTATE_NOT_INTERNED;
1073 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1074 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1075 op->ob_sval[size] = '\0';
1076 return (PyObject *) op;
1077 #undef b
1078 }
1079
1080 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1081 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1082 {
1083 register Py_ssize_t i;
1084 register Py_ssize_t j;
1085 register Py_ssize_t size;
1086 register PyStringObject *op;
1087 size_t nbytes;
1088 if (n < 0)
1089 n = 0;
1090 /* watch out for overflows: the size can overflow Py_ssize_t,
1091 * and the # of bytes needed can overflow size_t
1092 */
1093 if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 size = Py_SIZE(a) * n;
1099 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1100 Py_INCREF(a);
1101 return (PyObject *)a;
1102 }
1103 nbytes = (size_t)size;
1104 if (nbytes + PyStringObject_SIZE <= nbytes) {
1105 PyErr_SetString(PyExc_OverflowError,
1106 "repeated string is too long");
1107 return NULL;
1108 }
1109 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1110 if (op == NULL)
1111 return PyErr_NoMemory();
1112 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1113 op->ob_shash = -1;
1114 op->ob_sstate = SSTATE_NOT_INTERNED;
1115 op->ob_sval[size] = '\0';
1116 if (Py_SIZE(a) == 1 && n > 0) {
1117 memset(op->ob_sval, a->ob_sval[0] , n);
1118 return (PyObject *) op;
1119 }
1120 i = 0;
1121 if (i < size) {
1122 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1123 i = Py_SIZE(a);
1124 }
1125 while (i < size) {
1126 j = (i <= size-i) ? i : size-i;
1127 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1128 i += j;
1129 }
1130 return (PyObject *) op;
1131 }
1132
1133 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1134
1135 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1136 string_slice(register PyStringObject *a, register Py_ssize_t i,
1137 register Py_ssize_t j)
1138 /* j -- may be negative! */
1139 {
1140 if (i < 0)
1141 i = 0;
1142 if (j < 0)
1143 j = 0; /* Avoid signed/unsigned bug in next line */
1144 if (j > Py_SIZE(a))
1145 j = Py_SIZE(a);
1146 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1147 /* It's the same as a */
1148 Py_INCREF(a);
1149 return (PyObject *)a;
1150 }
1151 if (j < i)
1152 j = i;
1153 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1154 }
1155
1156 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1157 string_contains(PyObject *str_obj, PyObject *sub_obj)
1158 {
1159 if (!PyString_CheckExact(sub_obj)) {
1160 #ifdef Py_USING_UNICODE
1161 if (PyUnicode_Check(sub_obj))
1162 return PyUnicode_Contains(str_obj, sub_obj);
1163 #endif
1164 if (!PyString_Check(sub_obj)) {
1165 PyErr_Format(PyExc_TypeError,
1166 "'in <string>' requires string as left operand, "
1167 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1168 return -1;
1169 }
1170 }
1171
1172 return stringlib_contains_obj(str_obj, sub_obj);
1173 }
1174
1175 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1176 string_item(PyStringObject *a, register Py_ssize_t i)
1177 {
1178 char pchar;
1179 PyObject *v;
1180 if (i < 0 || i >= Py_SIZE(a)) {
1181 PyErr_SetString(PyExc_IndexError, "string index out of range");
1182 return NULL;
1183 }
1184 pchar = a->ob_sval[i];
1185 v = (PyObject *)characters[pchar & UCHAR_MAX];
1186 if (v == NULL)
1187 v = PyString_FromStringAndSize(&pchar, 1);
1188 else {
1189 #ifdef COUNT_ALLOCS
1190 one_strings++;
1191 #endif
1192 Py_INCREF(v);
1193 }
1194 return v;
1195 }
1196
1197 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1198 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1199 {
1200 int c;
1201 Py_ssize_t len_a, len_b;
1202 Py_ssize_t min_len;
1203 PyObject *result;
1204
1205 /* Make sure both arguments are strings. */
1206 if (!(PyString_Check(a) && PyString_Check(b))) {
1207 result = Py_NotImplemented;
1208 goto out;
1209 }
1210 if (a == b) {
1211 switch (op) {
1212 case Py_EQ:case Py_LE:case Py_GE:
1213 result = Py_True;
1214 goto out;
1215 case Py_NE:case Py_LT:case Py_GT:
1216 result = Py_False;
1217 goto out;
1218 }
1219 }
1220 if (op == Py_EQ) {
1221 /* Supporting Py_NE here as well does not save
1222 much time, since Py_NE is rarely used. */
1223 if (Py_SIZE(a) == Py_SIZE(b)
1224 && (a->ob_sval[0] == b->ob_sval[0]
1225 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1226 result = Py_True;
1227 } else {
1228 result = Py_False;
1229 }
1230 goto out;
1231 }
1232 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1233 min_len = (len_a < len_b) ? len_a : len_b;
1234 if (min_len > 0) {
1235 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1236 if (c==0)
1237 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1238 } else
1239 c = 0;
1240 if (c == 0)
1241 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1242 switch (op) {
1243 case Py_LT: c = c < 0; break;
1244 case Py_LE: c = c <= 0; break;
1245 case Py_EQ: assert(0); break; /* unreachable */
1246 case Py_NE: c = c != 0; break;
1247 case Py_GT: c = c > 0; break;
1248 case Py_GE: c = c >= 0; break;
1249 default:
1250 result = Py_NotImplemented;
1251 goto out;
1252 }
1253 result = c ? Py_True : Py_False;
1254 out:
1255 Py_INCREF(result);
1256 return result;
1257 }
1258
1259 int
_PyString_Eq(PyObject * o1,PyObject * o2)1260 _PyString_Eq(PyObject *o1, PyObject *o2)
1261 {
1262 PyStringObject *a = (PyStringObject*) o1;
1263 PyStringObject *b = (PyStringObject*) o2;
1264 return Py_SIZE(a) == Py_SIZE(b)
1265 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1266 }
1267
1268 static long
string_hash(PyStringObject * a)1269 string_hash(PyStringObject *a)
1270 {
1271 register Py_ssize_t len;
1272 register unsigned char *p;
1273 register long x;
1274
1275 #ifdef Py_DEBUG
1276 assert(_Py_HashSecret_Initialized);
1277 #endif
1278 if (a->ob_shash != -1)
1279 return a->ob_shash;
1280 len = Py_SIZE(a);
1281 /*
1282 We make the hash of the empty string be 0, rather than using
1283 (prefix ^ suffix), since this slightly obfuscates the hash secret
1284 */
1285 if (len == 0) {
1286 a->ob_shash = 0;
1287 return 0;
1288 }
1289 p = (unsigned char *) a->ob_sval;
1290 x = _Py_HashSecret.prefix;
1291 x ^= *p << 7;
1292 while (--len >= 0)
1293 x = (1000003*x) ^ *p++;
1294 x ^= Py_SIZE(a);
1295 x ^= _Py_HashSecret.suffix;
1296 if (x == -1)
1297 x = -2;
1298 a->ob_shash = x;
1299 return x;
1300 }
1301
1302 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1303 string_subscript(PyStringObject* self, PyObject* item)
1304 {
1305 if (PyIndex_Check(item)) {
1306 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1307 if (i == -1 && PyErr_Occurred())
1308 return NULL;
1309 if (i < 0)
1310 i += PyString_GET_SIZE(self);
1311 return string_item(self, i);
1312 }
1313 else if (PySlice_Check(item)) {
1314 Py_ssize_t start, stop, step, slicelength, cur, i;
1315 char* source_buf;
1316 char* result_buf;
1317 PyObject* result;
1318
1319 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1320 return NULL;
1321 }
1322 slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1323 &stop, step);
1324
1325 if (slicelength <= 0) {
1326 return PyString_FromStringAndSize("", 0);
1327 }
1328 else if (start == 0 && step == 1 &&
1329 slicelength == PyString_GET_SIZE(self) &&
1330 PyString_CheckExact(self)) {
1331 Py_INCREF(self);
1332 return (PyObject *)self;
1333 }
1334 else if (step == 1) {
1335 return PyString_FromStringAndSize(
1336 PyString_AS_STRING(self) + start,
1337 slicelength);
1338 }
1339 else {
1340 source_buf = PyString_AsString((PyObject*)self);
1341 result_buf = (char *)PyMem_Malloc(slicelength);
1342 if (result_buf == NULL)
1343 return PyErr_NoMemory();
1344
1345 for (cur = start, i = 0; i < slicelength;
1346 cur += step, i++) {
1347 result_buf[i] = source_buf[cur];
1348 }
1349
1350 result = PyString_FromStringAndSize(result_buf,
1351 slicelength);
1352 PyMem_Free(result_buf);
1353 return result;
1354 }
1355 }
1356 else {
1357 PyErr_Format(PyExc_TypeError,
1358 "string indices must be integers, not %.200s",
1359 Py_TYPE(item)->tp_name);
1360 return NULL;
1361 }
1362 }
1363
1364 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1365 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1366 {
1367 if ( index != 0 ) {
1368 PyErr_SetString(PyExc_SystemError,
1369 "accessing non-existent string segment");
1370 return -1;
1371 }
1372 *ptr = (void *)self->ob_sval;
1373 return Py_SIZE(self);
1374 }
1375
1376 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1377 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1378 {
1379 PyErr_SetString(PyExc_TypeError,
1380 "Cannot use string as modifiable buffer");
1381 return -1;
1382 }
1383
1384 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1385 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1386 {
1387 if ( lenp )
1388 *lenp = Py_SIZE(self);
1389 return 1;
1390 }
1391
1392 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1393 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1394 {
1395 if ( index != 0 ) {
1396 PyErr_SetString(PyExc_SystemError,
1397 "accessing non-existent string segment");
1398 return -1;
1399 }
1400 *ptr = self->ob_sval;
1401 return Py_SIZE(self);
1402 }
1403
1404 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1405 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1406 {
1407 return PyBuffer_FillInfo(view, (PyObject*)self,
1408 (void *)self->ob_sval, Py_SIZE(self),
1409 1, flags);
1410 }
1411
1412 static PySequenceMethods string_as_sequence = {
1413 (lenfunc)string_length, /*sq_length*/
1414 (binaryfunc)string_concat, /*sq_concat*/
1415 (ssizeargfunc)string_repeat, /*sq_repeat*/
1416 (ssizeargfunc)string_item, /*sq_item*/
1417 (ssizessizeargfunc)string_slice, /*sq_slice*/
1418 0, /*sq_ass_item*/
1419 0, /*sq_ass_slice*/
1420 (objobjproc)string_contains /*sq_contains*/
1421 };
1422
1423 static PyMappingMethods string_as_mapping = {
1424 (lenfunc)string_length,
1425 (binaryfunc)string_subscript,
1426 0,
1427 };
1428
1429 static PyBufferProcs string_as_buffer = {
1430 (readbufferproc)string_buffer_getreadbuf,
1431 (writebufferproc)string_buffer_getwritebuf,
1432 (segcountproc)string_buffer_getsegcount,
1433 (charbufferproc)string_buffer_getcharbuf,
1434 (getbufferproc)string_buffer_getbuffer,
1435 0, /* XXX */
1436 };
1437
1438
1439
1440 #define LEFTSTRIP 0
1441 #define RIGHTSTRIP 1
1442 #define BOTHSTRIP 2
1443
1444 /* Arrays indexed by above */
1445 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1446
1447 #define STRIPNAME(i) (stripformat[i]+3)
1448
1449 PyDoc_STRVAR(split__doc__,
1450 "S.split([sep [,maxsplit]]) -> list of strings\n\
1451 \n\
1452 Return a list of the words in the string S, using sep as the\n\
1453 delimiter string. If maxsplit is given, at most maxsplit\n\
1454 splits are done. If sep is not specified or is None, any\n\
1455 whitespace string is a separator and empty strings are removed\n\
1456 from the result.");
1457
1458 static PyObject *
string_split(PyStringObject * self,PyObject * args)1459 string_split(PyStringObject *self, PyObject *args)
1460 {
1461 Py_ssize_t len = PyString_GET_SIZE(self), n;
1462 Py_ssize_t maxsplit = -1;
1463 const char *s = PyString_AS_STRING(self), *sub;
1464 PyObject *subobj = Py_None;
1465
1466 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1467 return NULL;
1468 if (maxsplit < 0)
1469 maxsplit = PY_SSIZE_T_MAX;
1470 if (subobj == Py_None)
1471 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1472 if (PyString_Check(subobj)) {
1473 sub = PyString_AS_STRING(subobj);
1474 n = PyString_GET_SIZE(subobj);
1475 }
1476 #ifdef Py_USING_UNICODE
1477 else if (PyUnicode_Check(subobj))
1478 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1479 #endif
1480 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1481 return NULL;
1482
1483 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1484 }
1485
1486 PyDoc_STRVAR(partition__doc__,
1487 "S.partition(sep) -> (head, sep, tail)\n\
1488 \n\
1489 Search for the separator sep in S, and return the part before it,\n\
1490 the separator itself, and the part after it. If the separator is not\n\
1491 found, return S and two empty strings.");
1492
1493 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1494 string_partition(PyStringObject *self, PyObject *sep_obj)
1495 {
1496 const char *sep;
1497 Py_ssize_t sep_len;
1498
1499 if (PyString_Check(sep_obj)) {
1500 sep = PyString_AS_STRING(sep_obj);
1501 sep_len = PyString_GET_SIZE(sep_obj);
1502 }
1503 #ifdef Py_USING_UNICODE
1504 else if (PyUnicode_Check(sep_obj))
1505 return PyUnicode_Partition((PyObject *) self, sep_obj);
1506 #endif
1507 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1508 return NULL;
1509
1510 return stringlib_partition(
1511 (PyObject*) self,
1512 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1513 sep_obj, sep, sep_len
1514 );
1515 }
1516
1517 PyDoc_STRVAR(rpartition__doc__,
1518 "S.rpartition(sep) -> (head, sep, tail)\n\
1519 \n\
1520 Search for the separator sep in S, starting at the end of S, and return\n\
1521 the part before it, the separator itself, and the part after it. If the\n\
1522 separator is not found, return two empty strings and S.");
1523
1524 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1525 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1526 {
1527 const char *sep;
1528 Py_ssize_t sep_len;
1529
1530 if (PyString_Check(sep_obj)) {
1531 sep = PyString_AS_STRING(sep_obj);
1532 sep_len = PyString_GET_SIZE(sep_obj);
1533 }
1534 #ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(sep_obj))
1536 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1537 #endif
1538 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1539 return NULL;
1540
1541 return stringlib_rpartition(
1542 (PyObject*) self,
1543 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1544 sep_obj, sep, sep_len
1545 );
1546 }
1547
1548 PyDoc_STRVAR(rsplit__doc__,
1549 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1550 \n\
1551 Return a list of the words in the string S, using sep as the\n\
1552 delimiter string, starting at the end of the string and working\n\
1553 to the front. If maxsplit is given, at most maxsplit splits are\n\
1554 done. If sep is not specified or is None, any whitespace string\n\
1555 is a separator.");
1556
1557 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1558 string_rsplit(PyStringObject *self, PyObject *args)
1559 {
1560 Py_ssize_t len = PyString_GET_SIZE(self), n;
1561 Py_ssize_t maxsplit = -1;
1562 const char *s = PyString_AS_STRING(self), *sub;
1563 PyObject *subobj = Py_None;
1564
1565 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1566 return NULL;
1567 if (maxsplit < 0)
1568 maxsplit = PY_SSIZE_T_MAX;
1569 if (subobj == Py_None)
1570 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1571 if (PyString_Check(subobj)) {
1572 sub = PyString_AS_STRING(subobj);
1573 n = PyString_GET_SIZE(subobj);
1574 }
1575 #ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(subobj))
1577 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1578 #endif
1579 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580 return NULL;
1581
1582 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1583 }
1584
1585
1586 PyDoc_STRVAR(join__doc__,
1587 "S.join(iterable) -> string\n\
1588 \n\
1589 Return a string which is the concatenation of the strings in the\n\
1590 iterable. The separator between elements is S.");
1591
1592 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1593 string_join(PyStringObject *self, PyObject *orig)
1594 {
1595 char *sep = PyString_AS_STRING(self);
1596 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1597 PyObject *res = NULL;
1598 char *p;
1599 Py_ssize_t seqlen = 0;
1600 size_t sz = 0;
1601 Py_ssize_t i;
1602 PyObject *seq, *item;
1603
1604 seq = PySequence_Fast(orig, "can only join an iterable");
1605 if (seq == NULL) {
1606 return NULL;
1607 }
1608
1609 seqlen = PySequence_Size(seq);
1610 if (seqlen == 0) {
1611 Py_DECREF(seq);
1612 return PyString_FromString("");
1613 }
1614 if (seqlen == 1) {
1615 item = PySequence_Fast_GET_ITEM(seq, 0);
1616 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1617 Py_INCREF(item);
1618 Py_DECREF(seq);
1619 return item;
1620 }
1621 }
1622
1623 /* There are at least two things to join, or else we have a subclass
1624 * of the builtin types in the sequence.
1625 * Do a pre-pass to figure out the total amount of space we'll
1626 * need (sz), see whether any argument is absurd, and defer to
1627 * the Unicode join if appropriate.
1628 */
1629 for (i = 0; i < seqlen; i++) {
1630 const size_t old_sz = sz;
1631 item = PySequence_Fast_GET_ITEM(seq, i);
1632 if (!PyString_Check(item)){
1633 #ifdef Py_USING_UNICODE
1634 if (PyUnicode_Check(item)) {
1635 /* Defer to Unicode join.
1636 * CAUTION: There's no guarantee that the
1637 * original sequence can be iterated over
1638 * again, so we must pass seq here.
1639 */
1640 PyObject *result;
1641 result = PyUnicode_Join((PyObject *)self, seq);
1642 Py_DECREF(seq);
1643 return result;
1644 }
1645 #endif
1646 PyErr_Format(PyExc_TypeError,
1647 "sequence item %zd: expected string,"
1648 " %.80s found",
1649 i, Py_TYPE(item)->tp_name);
1650 Py_DECREF(seq);
1651 return NULL;
1652 }
1653 sz += PyString_GET_SIZE(item);
1654 if (i != 0)
1655 sz += seplen;
1656 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1657 PyErr_SetString(PyExc_OverflowError,
1658 "join() result is too long for a Python string");
1659 Py_DECREF(seq);
1660 return NULL;
1661 }
1662 }
1663
1664 /* Allocate result space. */
1665 res = PyString_FromStringAndSize((char*)NULL, sz);
1666 if (res == NULL) {
1667 Py_DECREF(seq);
1668 return NULL;
1669 }
1670
1671 /* Catenate everything. */
1672 p = PyString_AS_STRING(res);
1673 for (i = 0; i < seqlen; ++i) {
1674 size_t n;
1675 item = PySequence_Fast_GET_ITEM(seq, i);
1676 n = PyString_GET_SIZE(item);
1677 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1678 p += n;
1679 if (i < seqlen - 1) {
1680 Py_MEMCPY(p, sep, seplen);
1681 p += seplen;
1682 }
1683 }
1684
1685 Py_DECREF(seq);
1686 return res;
1687 }
1688
1689 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1690 _PyString_Join(PyObject *sep, PyObject *x)
1691 {
1692 assert(sep != NULL && PyString_Check(sep));
1693 assert(x != NULL);
1694 return string_join((PyStringObject *)sep, x);
1695 }
1696
1697 /* helper macro to fixup start/end slice values */
1698 #define ADJUST_INDICES(start, end, len) \
1699 if (end > len) \
1700 end = len; \
1701 else if (end < 0) { \
1702 end += len; \
1703 if (end < 0) \
1704 end = 0; \
1705 } \
1706 if (start < 0) { \
1707 start += len; \
1708 if (start < 0) \
1709 start = 0; \
1710 }
1711
1712 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1713 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1714 {
1715 PyObject *subobj;
1716 const char *sub;
1717 Py_ssize_t sub_len;
1718 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1719
1720 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1721 args, &subobj, &start, &end))
1722 return -2;
1723
1724 if (PyString_Check(subobj)) {
1725 sub = PyString_AS_STRING(subobj);
1726 sub_len = PyString_GET_SIZE(subobj);
1727 }
1728 #ifdef Py_USING_UNICODE
1729 else if (PyUnicode_Check(subobj))
1730 return PyUnicode_Find(
1731 (PyObject *)self, subobj, start, end, dir);
1732 #endif
1733 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1734 /* XXX - the "expected a character buffer object" is pretty
1735 confusing for a non-expert. remap to something else ? */
1736 return -2;
1737
1738 if (dir > 0)
1739 return stringlib_find_slice(
1740 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1741 sub, sub_len, start, end);
1742 else
1743 return stringlib_rfind_slice(
1744 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1745 sub, sub_len, start, end);
1746 }
1747
1748
1749 PyDoc_STRVAR(find__doc__,
1750 "S.find(sub [,start [,end]]) -> int\n\
1751 \n\
1752 Return the lowest index in S where substring sub is found,\n\
1753 such that sub is contained within S[start:end]. Optional\n\
1754 arguments start and end are interpreted as in slice notation.\n\
1755 \n\
1756 Return -1 on failure.");
1757
1758 static PyObject *
string_find(PyStringObject * self,PyObject * args)1759 string_find(PyStringObject *self, PyObject *args)
1760 {
1761 Py_ssize_t result = string_find_internal(self, args, +1);
1762 if (result == -2)
1763 return NULL;
1764 return PyInt_FromSsize_t(result);
1765 }
1766
1767
1768 PyDoc_STRVAR(index__doc__,
1769 "S.index(sub [,start [,end]]) -> int\n\
1770 \n\
1771 Like S.find() but raise ValueError when the substring is not found.");
1772
1773 static PyObject *
string_index(PyStringObject * self,PyObject * args)1774 string_index(PyStringObject *self, PyObject *args)
1775 {
1776 Py_ssize_t result = string_find_internal(self, args, +1);
1777 if (result == -2)
1778 return NULL;
1779 if (result == -1) {
1780 PyErr_SetString(PyExc_ValueError,
1781 "substring not found");
1782 return NULL;
1783 }
1784 return PyInt_FromSsize_t(result);
1785 }
1786
1787
1788 PyDoc_STRVAR(rfind__doc__,
1789 "S.rfind(sub [,start [,end]]) -> int\n\
1790 \n\
1791 Return the highest index in S where substring sub is found,\n\
1792 such that sub is contained within S[start:end]. Optional\n\
1793 arguments start and end are interpreted as in slice notation.\n\
1794 \n\
1795 Return -1 on failure.");
1796
1797 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1798 string_rfind(PyStringObject *self, PyObject *args)
1799 {
1800 Py_ssize_t result = string_find_internal(self, args, -1);
1801 if (result == -2)
1802 return NULL;
1803 return PyInt_FromSsize_t(result);
1804 }
1805
1806
1807 PyDoc_STRVAR(rindex__doc__,
1808 "S.rindex(sub [,start [,end]]) -> int\n\
1809 \n\
1810 Like S.rfind() but raise ValueError when the substring is not found.");
1811
1812 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1813 string_rindex(PyStringObject *self, PyObject *args)
1814 {
1815 Py_ssize_t result = string_find_internal(self, args, -1);
1816 if (result == -2)
1817 return NULL;
1818 if (result == -1) {
1819 PyErr_SetString(PyExc_ValueError,
1820 "substring not found");
1821 return NULL;
1822 }
1823 return PyInt_FromSsize_t(result);
1824 }
1825
1826
1827 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1828 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1829 {
1830 char *s = PyString_AS_STRING(self);
1831 Py_ssize_t len = PyString_GET_SIZE(self);
1832 char *sep = PyString_AS_STRING(sepobj);
1833 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1834 Py_ssize_t i, j;
1835
1836 i = 0;
1837 if (striptype != RIGHTSTRIP) {
1838 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1839 i++;
1840 }
1841 }
1842
1843 j = len;
1844 if (striptype != LEFTSTRIP) {
1845 do {
1846 j--;
1847 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1848 j++;
1849 }
1850
1851 if (i == 0 && j == len && PyString_CheckExact(self)) {
1852 Py_INCREF(self);
1853 return (PyObject*)self;
1854 }
1855 else
1856 return PyString_FromStringAndSize(s+i, j-i);
1857 }
1858
1859
1860 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1861 do_strip(PyStringObject *self, int striptype)
1862 {
1863 char *s = PyString_AS_STRING(self);
1864 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1865
1866 i = 0;
1867 if (striptype != RIGHTSTRIP) {
1868 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1869 i++;
1870 }
1871 }
1872
1873 j = len;
1874 if (striptype != LEFTSTRIP) {
1875 do {
1876 j--;
1877 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1878 j++;
1879 }
1880
1881 if (i == 0 && j == len && PyString_CheckExact(self)) {
1882 Py_INCREF(self);
1883 return (PyObject*)self;
1884 }
1885 else
1886 return PyString_FromStringAndSize(s+i, j-i);
1887 }
1888
1889
1890 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1891 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1892 {
1893 PyObject *sep = NULL;
1894
1895 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1896 return NULL;
1897
1898 if (sep != NULL && sep != Py_None) {
1899 if (PyString_Check(sep))
1900 return do_xstrip(self, striptype, sep);
1901 #ifdef Py_USING_UNICODE
1902 else if (PyUnicode_Check(sep)) {
1903 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1904 PyObject *res;
1905 if (uniself==NULL)
1906 return NULL;
1907 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1908 striptype, sep);
1909 Py_DECREF(uniself);
1910 return res;
1911 }
1912 #endif
1913 PyErr_Format(PyExc_TypeError,
1914 #ifdef Py_USING_UNICODE
1915 "%s arg must be None, str or unicode",
1916 #else
1917 "%s arg must be None or str",
1918 #endif
1919 STRIPNAME(striptype));
1920 return NULL;
1921 }
1922
1923 return do_strip(self, striptype);
1924 }
1925
1926
1927 PyDoc_STRVAR(strip__doc__,
1928 "S.strip([chars]) -> string or unicode\n\
1929 \n\
1930 Return a copy of the string S with leading and trailing\n\
1931 whitespace removed.\n\
1932 If chars is given and not None, remove characters in chars instead.\n\
1933 If chars is unicode, S will be converted to unicode before stripping");
1934
1935 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1936 string_strip(PyStringObject *self, PyObject *args)
1937 {
1938 if (PyTuple_GET_SIZE(args) == 0)
1939 return do_strip(self, BOTHSTRIP); /* Common case */
1940 else
1941 return do_argstrip(self, BOTHSTRIP, args);
1942 }
1943
1944
1945 PyDoc_STRVAR(lstrip__doc__,
1946 "S.lstrip([chars]) -> string or unicode\n\
1947 \n\
1948 Return a copy of the string S with leading whitespace removed.\n\
1949 If chars is given and not None, remove characters in chars instead.\n\
1950 If chars is unicode, S will be converted to unicode before stripping");
1951
1952 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1953 string_lstrip(PyStringObject *self, PyObject *args)
1954 {
1955 if (PyTuple_GET_SIZE(args) == 0)
1956 return do_strip(self, LEFTSTRIP); /* Common case */
1957 else
1958 return do_argstrip(self, LEFTSTRIP, args);
1959 }
1960
1961
1962 PyDoc_STRVAR(rstrip__doc__,
1963 "S.rstrip([chars]) -> string or unicode\n\
1964 \n\
1965 Return a copy of the string S with trailing whitespace removed.\n\
1966 If chars is given and not None, remove characters in chars instead.\n\
1967 If chars is unicode, S will be converted to unicode before stripping");
1968
1969 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1970 string_rstrip(PyStringObject *self, PyObject *args)
1971 {
1972 if (PyTuple_GET_SIZE(args) == 0)
1973 return do_strip(self, RIGHTSTRIP); /* Common case */
1974 else
1975 return do_argstrip(self, RIGHTSTRIP, args);
1976 }
1977
1978
1979 PyDoc_STRVAR(lower__doc__,
1980 "S.lower() -> string\n\
1981 \n\
1982 Return a copy of the string S converted to lowercase.");
1983
1984 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1985 #ifndef _tolower
1986 #define _tolower tolower
1987 #endif
1988
1989 static PyObject *
string_lower(PyStringObject * self)1990 string_lower(PyStringObject *self)
1991 {
1992 char *s;
1993 Py_ssize_t i, n = PyString_GET_SIZE(self);
1994 PyObject *newobj;
1995
1996 newobj = PyString_FromStringAndSize(NULL, n);
1997 if (!newobj)
1998 return NULL;
1999
2000 s = PyString_AS_STRING(newobj);
2001
2002 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2003
2004 for (i = 0; i < n; i++) {
2005 int c = Py_CHARMASK(s[i]);
2006 if (isupper(c))
2007 s[i] = _tolower(c);
2008 }
2009
2010 return newobj;
2011 }
2012
2013 PyDoc_STRVAR(upper__doc__,
2014 "S.upper() -> string\n\
2015 \n\
2016 Return a copy of the string S converted to uppercase.");
2017
2018 #ifndef _toupper
2019 #define _toupper toupper
2020 #endif
2021
2022 static PyObject *
string_upper(PyStringObject * self)2023 string_upper(PyStringObject *self)
2024 {
2025 char *s;
2026 Py_ssize_t i, n = PyString_GET_SIZE(self);
2027 PyObject *newobj;
2028
2029 newobj = PyString_FromStringAndSize(NULL, n);
2030 if (!newobj)
2031 return NULL;
2032
2033 s = PyString_AS_STRING(newobj);
2034
2035 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2036
2037 for (i = 0; i < n; i++) {
2038 int c = Py_CHARMASK(s[i]);
2039 if (islower(c))
2040 s[i] = _toupper(c);
2041 }
2042
2043 return newobj;
2044 }
2045
2046 PyDoc_STRVAR(title__doc__,
2047 "S.title() -> string\n\
2048 \n\
2049 Return a titlecased version of S, i.e. words start with uppercase\n\
2050 characters, all remaining cased characters have lowercase.");
2051
2052 static PyObject*
string_title(PyStringObject * self)2053 string_title(PyStringObject *self)
2054 {
2055 char *s = PyString_AS_STRING(self), *s_new;
2056 Py_ssize_t i, n = PyString_GET_SIZE(self);
2057 int previous_is_cased = 0;
2058 PyObject *newobj;
2059
2060 newobj = PyString_FromStringAndSize(NULL, n);
2061 if (newobj == NULL)
2062 return NULL;
2063 s_new = PyString_AsString(newobj);
2064 for (i = 0; i < n; i++) {
2065 int c = Py_CHARMASK(*s++);
2066 if (islower(c)) {
2067 if (!previous_is_cased)
2068 c = toupper(c);
2069 previous_is_cased = 1;
2070 } else if (isupper(c)) {
2071 if (previous_is_cased)
2072 c = tolower(c);
2073 previous_is_cased = 1;
2074 } else
2075 previous_is_cased = 0;
2076 *s_new++ = c;
2077 }
2078 return newobj;
2079 }
2080
2081 PyDoc_STRVAR(capitalize__doc__,
2082 "S.capitalize() -> string\n\
2083 \n\
2084 Return a copy of the string S with only its first character\n\
2085 capitalized.");
2086
2087 static PyObject *
string_capitalize(PyStringObject * self)2088 string_capitalize(PyStringObject *self)
2089 {
2090 char *s = PyString_AS_STRING(self), *s_new;
2091 Py_ssize_t i, n = PyString_GET_SIZE(self);
2092 PyObject *newobj;
2093
2094 newobj = PyString_FromStringAndSize(NULL, n);
2095 if (newobj == NULL)
2096 return NULL;
2097 s_new = PyString_AsString(newobj);
2098 if (0 < n) {
2099 int c = Py_CHARMASK(*s++);
2100 if (islower(c))
2101 *s_new = toupper(c);
2102 else
2103 *s_new = c;
2104 s_new++;
2105 }
2106 for (i = 1; i < n; i++) {
2107 int c = Py_CHARMASK(*s++);
2108 if (isupper(c))
2109 *s_new = tolower(c);
2110 else
2111 *s_new = c;
2112 s_new++;
2113 }
2114 return newobj;
2115 }
2116
2117
2118 PyDoc_STRVAR(count__doc__,
2119 "S.count(sub[, start[, end]]) -> int\n\
2120 \n\
2121 Return the number of non-overlapping occurrences of substring sub in\n\
2122 string S[start:end]. Optional arguments start and end are interpreted\n\
2123 as in slice notation.");
2124
2125 static PyObject *
string_count(PyStringObject * self,PyObject * args)2126 string_count(PyStringObject *self, PyObject *args)
2127 {
2128 PyObject *sub_obj;
2129 const char *str = PyString_AS_STRING(self), *sub;
2130 Py_ssize_t sub_len;
2131 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2132
2133 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2134 return NULL;
2135
2136 if (PyString_Check(sub_obj)) {
2137 sub = PyString_AS_STRING(sub_obj);
2138 sub_len = PyString_GET_SIZE(sub_obj);
2139 }
2140 #ifdef Py_USING_UNICODE
2141 else if (PyUnicode_Check(sub_obj)) {
2142 Py_ssize_t count;
2143 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2144 if (count == -1)
2145 return NULL;
2146 else
2147 return PyInt_FromSsize_t(count);
2148 }
2149 #endif
2150 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2151 return NULL;
2152
2153 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2154
2155 return PyInt_FromSsize_t(
2156 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2157 );
2158 }
2159
2160 PyDoc_STRVAR(swapcase__doc__,
2161 "S.swapcase() -> string\n\
2162 \n\
2163 Return a copy of the string S with uppercase characters\n\
2164 converted to lowercase and vice versa.");
2165
2166 static PyObject *
string_swapcase(PyStringObject * self)2167 string_swapcase(PyStringObject *self)
2168 {
2169 char *s = PyString_AS_STRING(self), *s_new;
2170 Py_ssize_t i, n = PyString_GET_SIZE(self);
2171 PyObject *newobj;
2172
2173 newobj = PyString_FromStringAndSize(NULL, n);
2174 if (newobj == NULL)
2175 return NULL;
2176 s_new = PyString_AsString(newobj);
2177 for (i = 0; i < n; i++) {
2178 int c = Py_CHARMASK(*s++);
2179 if (islower(c)) {
2180 *s_new = toupper(c);
2181 }
2182 else if (isupper(c)) {
2183 *s_new = tolower(c);
2184 }
2185 else
2186 *s_new = c;
2187 s_new++;
2188 }
2189 return newobj;
2190 }
2191
2192
2193 PyDoc_STRVAR(translate__doc__,
2194 "S.translate(table [,deletechars]) -> string\n\
2195 \n\
2196 Return a copy of the string S, where all characters occurring\n\
2197 in the optional argument deletechars are removed, and the\n\
2198 remaining characters have been mapped through the given\n\
2199 translation table, which must be a string of length 256 or None.\n\
2200 If the table argument is None, no translation is applied and\n\
2201 the operation simply removes the characters in deletechars.");
2202
2203 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2204 string_translate(PyStringObject *self, PyObject *args)
2205 {
2206 register char *input, *output;
2207 const char *table;
2208 register Py_ssize_t i, c, changed = 0;
2209 PyObject *input_obj = (PyObject*)self;
2210 const char *output_start, *del_table=NULL;
2211 Py_ssize_t inlen, tablen, dellen = 0;
2212 PyObject *result;
2213 int trans_table[256];
2214 PyObject *tableobj, *delobj = NULL;
2215
2216 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2217 &tableobj, &delobj))
2218 return NULL;
2219
2220 if (PyString_Check(tableobj)) {
2221 table = PyString_AS_STRING(tableobj);
2222 tablen = PyString_GET_SIZE(tableobj);
2223 }
2224 else if (tableobj == Py_None) {
2225 table = NULL;
2226 tablen = 256;
2227 }
2228 #ifdef Py_USING_UNICODE
2229 else if (PyUnicode_Check(tableobj)) {
2230 /* Unicode .translate() does not support the deletechars
2231 parameter; instead a mapping to None will cause characters
2232 to be deleted. */
2233 if (delobj != NULL) {
2234 PyErr_SetString(PyExc_TypeError,
2235 "deletions are implemented differently for unicode");
2236 return NULL;
2237 }
2238 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2239 }
2240 #endif
2241 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2242 return NULL;
2243
2244 if (tablen != 256) {
2245 PyErr_SetString(PyExc_ValueError,
2246 "translation table must be 256 characters long");
2247 return NULL;
2248 }
2249
2250 if (delobj != NULL) {
2251 if (PyString_Check(delobj)) {
2252 del_table = PyString_AS_STRING(delobj);
2253 dellen = PyString_GET_SIZE(delobj);
2254 }
2255 #ifdef Py_USING_UNICODE
2256 else if (PyUnicode_Check(delobj)) {
2257 PyErr_SetString(PyExc_TypeError,
2258 "deletions are implemented differently for unicode");
2259 return NULL;
2260 }
2261 #endif
2262 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2263 return NULL;
2264 }
2265 else {
2266 del_table = NULL;
2267 dellen = 0;
2268 }
2269
2270 inlen = PyString_GET_SIZE(input_obj);
2271 result = PyString_FromStringAndSize((char *)NULL, inlen);
2272 if (result == NULL)
2273 return NULL;
2274 output_start = output = PyString_AsString(result);
2275 input = PyString_AS_STRING(input_obj);
2276
2277 if (dellen == 0 && table != NULL) {
2278 /* If no deletions are required, use faster code */
2279 for (i = inlen; --i >= 0; ) {
2280 c = Py_CHARMASK(*input++);
2281 if (Py_CHARMASK((*output++ = table[c])) != c)
2282 changed = 1;
2283 }
2284 if (changed || !PyString_CheckExact(input_obj))
2285 return result;
2286 Py_DECREF(result);
2287 Py_INCREF(input_obj);
2288 return input_obj;
2289 }
2290
2291 if (table == NULL) {
2292 for (i = 0; i < 256; i++)
2293 trans_table[i] = Py_CHARMASK(i);
2294 } else {
2295 for (i = 0; i < 256; i++)
2296 trans_table[i] = Py_CHARMASK(table[i]);
2297 }
2298
2299 for (i = 0; i < dellen; i++)
2300 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2301
2302 for (i = inlen; --i >= 0; ) {
2303 c = Py_CHARMASK(*input++);
2304 if (trans_table[c] != -1)
2305 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2306 continue;
2307 changed = 1;
2308 }
2309 if (!changed && PyString_CheckExact(input_obj)) {
2310 Py_DECREF(result);
2311 Py_INCREF(input_obj);
2312 return input_obj;
2313 }
2314 /* Fix the size of the resulting string */
2315 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2316 return NULL;
2317 return result;
2318 }
2319
2320
2321 /* find and count characters and substrings */
2322
2323 #define findchar(target, target_len, c) \
2324 ((char *)memchr((const void *)(target), c, target_len))
2325
2326 /* String ops must return a string. */
2327 /* If the object is subclass of string, create a copy */
2328 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2329 return_self(PyStringObject *self)
2330 {
2331 if (PyString_CheckExact(self)) {
2332 Py_INCREF(self);
2333 return self;
2334 }
2335 return (PyStringObject *)PyString_FromStringAndSize(
2336 PyString_AS_STRING(self),
2337 PyString_GET_SIZE(self));
2338 }
2339
2340 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2341 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2342 {
2343 Py_ssize_t count=0;
2344 const char *start=target;
2345 const char *end=target+target_len;
2346
2347 while ( (start=findchar(start, end-start, c)) != NULL ) {
2348 count++;
2349 if (count >= maxcount)
2350 break;
2351 start += 1;
2352 }
2353 return count;
2354 }
2355
2356
2357 /* Algorithms for different cases of string replacement */
2358
2359 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2360 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2361 replace_interleave(PyStringObject *self,
2362 const char *to_s, Py_ssize_t to_len,
2363 Py_ssize_t maxcount)
2364 {
2365 char *self_s, *result_s;
2366 Py_ssize_t self_len, result_len;
2367 Py_ssize_t count, i;
2368 PyStringObject *result;
2369
2370 self_len = PyString_GET_SIZE(self);
2371
2372 /* 1 at the end plus 1 after every character;
2373 count = min(maxcount, self_len + 1) */
2374 if (maxcount <= self_len) {
2375 count = maxcount;
2376 }
2377 else {
2378 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2379 count = self_len + 1;
2380 }
2381
2382 /* Check for overflow */
2383 /* result_len = count * to_len + self_len; */
2384 assert(count > 0);
2385 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
2386 PyErr_SetString(PyExc_OverflowError,
2387 "replace string is too long");
2388 return NULL;
2389 }
2390 result_len = count * to_len + self_len;
2391 if (! (result = (PyStringObject *)
2392 PyString_FromStringAndSize(NULL, result_len)) )
2393 return NULL;
2394
2395 self_s = PyString_AS_STRING(self);
2396 result_s = PyString_AS_STRING(result);
2397
2398 /* TODO: special case single character, which doesn't need memcpy */
2399
2400 /* Lay the first one down (guaranteed this will occur) */
2401 Py_MEMCPY(result_s, to_s, to_len);
2402 result_s += to_len;
2403 count -= 1;
2404
2405 for (i=0; i<count; i++) {
2406 *result_s++ = *self_s++;
2407 Py_MEMCPY(result_s, to_s, to_len);
2408 result_s += to_len;
2409 }
2410
2411 /* Copy the rest of the original string */
2412 Py_MEMCPY(result_s, self_s, self_len-i);
2413
2414 return result;
2415 }
2416
2417 /* Special case for deleting a single character */
2418 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2419 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2420 replace_delete_single_character(PyStringObject *self,
2421 char from_c, Py_ssize_t maxcount)
2422 {
2423 char *self_s, *result_s;
2424 char *start, *next, *end;
2425 Py_ssize_t self_len, result_len;
2426 Py_ssize_t count;
2427 PyStringObject *result;
2428
2429 self_len = PyString_GET_SIZE(self);
2430 self_s = PyString_AS_STRING(self);
2431
2432 count = countchar(self_s, self_len, from_c, maxcount);
2433 if (count == 0) {
2434 return return_self(self);
2435 }
2436
2437 result_len = self_len - count; /* from_len == 1 */
2438 assert(result_len>=0);
2439
2440 if ( (result = (PyStringObject *)
2441 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2442 return NULL;
2443 result_s = PyString_AS_STRING(result);
2444
2445 start = self_s;
2446 end = self_s + self_len;
2447 while (count-- > 0) {
2448 next = findchar(start, end-start, from_c);
2449 if (next == NULL)
2450 break;
2451 Py_MEMCPY(result_s, start, next-start);
2452 result_s += (next-start);
2453 start = next+1;
2454 }
2455 Py_MEMCPY(result_s, start, end-start);
2456
2457 return result;
2458 }
2459
2460 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2461
2462 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2463 replace_delete_substring(PyStringObject *self,
2464 const char *from_s, Py_ssize_t from_len,
2465 Py_ssize_t maxcount) {
2466 char *self_s, *result_s;
2467 char *start, *next, *end;
2468 Py_ssize_t self_len, result_len;
2469 Py_ssize_t count, offset;
2470 PyStringObject *result;
2471
2472 self_len = PyString_GET_SIZE(self);
2473 self_s = PyString_AS_STRING(self);
2474
2475 count = stringlib_count(self_s, self_len,
2476 from_s, from_len,
2477 maxcount);
2478
2479 if (count == 0) {
2480 /* no matches */
2481 return return_self(self);
2482 }
2483
2484 result_len = self_len - (count * from_len);
2485 assert (result_len>=0);
2486
2487 if ( (result = (PyStringObject *)
2488 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2489 return NULL;
2490
2491 result_s = PyString_AS_STRING(result);
2492
2493 start = self_s;
2494 end = self_s + self_len;
2495 while (count-- > 0) {
2496 offset = stringlib_find(start, end-start,
2497 from_s, from_len,
2498 0);
2499 if (offset == -1)
2500 break;
2501 next = start + offset;
2502
2503 Py_MEMCPY(result_s, start, next-start);
2504
2505 result_s += (next-start);
2506 start = next+from_len;
2507 }
2508 Py_MEMCPY(result_s, start, end-start);
2509 return result;
2510 }
2511
2512 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2513 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2514 replace_single_character_in_place(PyStringObject *self,
2515 char from_c, char to_c,
2516 Py_ssize_t maxcount)
2517 {
2518 char *self_s, *result_s, *start, *end, *next;
2519 Py_ssize_t self_len;
2520 PyStringObject *result;
2521
2522 /* The result string will be the same size */
2523 self_s = PyString_AS_STRING(self);
2524 self_len = PyString_GET_SIZE(self);
2525
2526 next = findchar(self_s, self_len, from_c);
2527
2528 if (next == NULL) {
2529 /* No matches; return the original string */
2530 return return_self(self);
2531 }
2532
2533 /* Need to make a new string */
2534 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2535 if (result == NULL)
2536 return NULL;
2537 result_s = PyString_AS_STRING(result);
2538 Py_MEMCPY(result_s, self_s, self_len);
2539
2540 /* change everything in-place, starting with this one */
2541 start = result_s + (next-self_s);
2542 *start = to_c;
2543 start++;
2544 end = result_s + self_len;
2545
2546 while (--maxcount > 0) {
2547 next = findchar(start, end-start, from_c);
2548 if (next == NULL)
2549 break;
2550 *next = to_c;
2551 start = next+1;
2552 }
2553
2554 return result;
2555 }
2556
2557 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2558 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2559 replace_substring_in_place(PyStringObject *self,
2560 const char *from_s, Py_ssize_t from_len,
2561 const char *to_s, Py_ssize_t to_len,
2562 Py_ssize_t maxcount)
2563 {
2564 char *result_s, *start, *end;
2565 char *self_s;
2566 Py_ssize_t self_len, offset;
2567 PyStringObject *result;
2568
2569 /* The result string will be the same size */
2570
2571 self_s = PyString_AS_STRING(self);
2572 self_len = PyString_GET_SIZE(self);
2573
2574 offset = stringlib_find(self_s, self_len,
2575 from_s, from_len,
2576 0);
2577 if (offset == -1) {
2578 /* No matches; return the original string */
2579 return return_self(self);
2580 }
2581
2582 /* Need to make a new string */
2583 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2584 if (result == NULL)
2585 return NULL;
2586 result_s = PyString_AS_STRING(result);
2587 Py_MEMCPY(result_s, self_s, self_len);
2588
2589 /* change everything in-place, starting with this one */
2590 start = result_s + offset;
2591 Py_MEMCPY(start, to_s, from_len);
2592 start += from_len;
2593 end = result_s + self_len;
2594
2595 while ( --maxcount > 0) {
2596 offset = stringlib_find(start, end-start,
2597 from_s, from_len,
2598 0);
2599 if (offset==-1)
2600 break;
2601 Py_MEMCPY(start+offset, to_s, from_len);
2602 start += offset+from_len;
2603 }
2604
2605 return result;
2606 }
2607
2608 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2609 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2610 replace_single_character(PyStringObject *self,
2611 char from_c,
2612 const char *to_s, Py_ssize_t to_len,
2613 Py_ssize_t maxcount)
2614 {
2615 char *self_s, *result_s;
2616 char *start, *next, *end;
2617 Py_ssize_t self_len, result_len;
2618 Py_ssize_t count;
2619 PyStringObject *result;
2620
2621 self_s = PyString_AS_STRING(self);
2622 self_len = PyString_GET_SIZE(self);
2623
2624 count = countchar(self_s, self_len, from_c, maxcount);
2625 if (count == 0) {
2626 /* no matches, return unchanged */
2627 return return_self(self);
2628 }
2629
2630 /* use the difference between current and new, hence the "-1" */
2631 /* result_len = self_len + count * (to_len-1) */
2632 assert(count > 0);
2633 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
2634 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635 return NULL;
2636 }
2637 result_len = self_len + count * (to_len - 1);
2638
2639 if ( (result = (PyStringObject *)
2640 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2641 return NULL;
2642 result_s = PyString_AS_STRING(result);
2643
2644 start = self_s;
2645 end = self_s + self_len;
2646 while (count-- > 0) {
2647 next = findchar(start, end-start, from_c);
2648 if (next == NULL)
2649 break;
2650
2651 if (next == start) {
2652 /* replace with the 'to' */
2653 Py_MEMCPY(result_s, to_s, to_len);
2654 result_s += to_len;
2655 start += 1;
2656 } else {
2657 /* copy the unchanged old then the 'to' */
2658 Py_MEMCPY(result_s, start, next-start);
2659 result_s += (next-start);
2660 Py_MEMCPY(result_s, to_s, to_len);
2661 result_s += to_len;
2662 start = next+1;
2663 }
2664 }
2665 /* Copy the remainder of the remaining string */
2666 Py_MEMCPY(result_s, start, end-start);
2667
2668 return result;
2669 }
2670
2671 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2672 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2673 replace_substring(PyStringObject *self,
2674 const char *from_s, Py_ssize_t from_len,
2675 const char *to_s, Py_ssize_t to_len,
2676 Py_ssize_t maxcount) {
2677 char *self_s, *result_s;
2678 char *start, *next, *end;
2679 Py_ssize_t self_len, result_len;
2680 Py_ssize_t count, offset;
2681 PyStringObject *result;
2682
2683 self_s = PyString_AS_STRING(self);
2684 self_len = PyString_GET_SIZE(self);
2685
2686 count = stringlib_count(self_s, self_len,
2687 from_s, from_len,
2688 maxcount);
2689
2690 if (count == 0) {
2691 /* no matches, return unchanged */
2692 return return_self(self);
2693 }
2694
2695 /* Check for overflow */
2696 /* result_len = self_len + count * (to_len-from_len) */
2697 assert(count > 0);
2698 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
2702 result_len = self_len + count * (to_len - from_len);
2703
2704 if ( (result = (PyStringObject *)
2705 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2706 return NULL;
2707 result_s = PyString_AS_STRING(result);
2708
2709 start = self_s;
2710 end = self_s + self_len;
2711 while (count-- > 0) {
2712 offset = stringlib_find(start, end-start,
2713 from_s, from_len,
2714 0);
2715 if (offset == -1)
2716 break;
2717 next = start+offset;
2718 if (next == start) {
2719 /* replace with the 'to' */
2720 Py_MEMCPY(result_s, to_s, to_len);
2721 result_s += to_len;
2722 start += from_len;
2723 } else {
2724 /* copy the unchanged old then the 'to' */
2725 Py_MEMCPY(result_s, start, next-start);
2726 result_s += (next-start);
2727 Py_MEMCPY(result_s, to_s, to_len);
2728 result_s += to_len;
2729 start = next+from_len;
2730 }
2731 }
2732 /* Copy the remainder of the remaining string */
2733 Py_MEMCPY(result_s, start, end-start);
2734
2735 return result;
2736 }
2737
2738
2739 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2740 replace(PyStringObject *self,
2741 const char *from_s, Py_ssize_t from_len,
2742 const char *to_s, Py_ssize_t to_len,
2743 Py_ssize_t maxcount)
2744 {
2745 if (maxcount < 0) {
2746 maxcount = PY_SSIZE_T_MAX;
2747 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2748 /* nothing to do; return the original string */
2749 return return_self(self);
2750 }
2751
2752 if (maxcount == 0 ||
2753 (from_len == 0 && to_len == 0)) {
2754 /* nothing to do; return the original string */
2755 return return_self(self);
2756 }
2757
2758 /* Handle zero-length special cases */
2759
2760 if (from_len == 0) {
2761 /* insert the 'to' string everywhere. */
2762 /* >>> "Python".replace("", ".") */
2763 /* '.P.y.t.h.o.n.' */
2764 return replace_interleave(self, to_s, to_len, maxcount);
2765 }
2766
2767 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2768 /* point for an empty self string to generate a non-empty string */
2769 /* Special case so the remaining code always gets a non-empty string */
2770 if (PyString_GET_SIZE(self) == 0) {
2771 return return_self(self);
2772 }
2773
2774 if (to_len == 0) {
2775 /* delete all occurrences of 'from' string */
2776 if (from_len == 1) {
2777 return replace_delete_single_character(
2778 self, from_s[0], maxcount);
2779 } else {
2780 return replace_delete_substring(self, from_s, from_len, maxcount);
2781 }
2782 }
2783
2784 /* Handle special case where both strings have the same length */
2785
2786 if (from_len == to_len) {
2787 if (from_len == 1) {
2788 return replace_single_character_in_place(
2789 self,
2790 from_s[0],
2791 to_s[0],
2792 maxcount);
2793 } else {
2794 return replace_substring_in_place(
2795 self, from_s, from_len, to_s, to_len, maxcount);
2796 }
2797 }
2798
2799 /* Otherwise use the more generic algorithms */
2800 if (from_len == 1) {
2801 return replace_single_character(self, from_s[0],
2802 to_s, to_len, maxcount);
2803 } else {
2804 /* len('from')>=2, len('to')>=1 */
2805 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2806 }
2807 }
2808
2809 PyDoc_STRVAR(replace__doc__,
2810 "S.replace(old, new[, count]) -> string\n\
2811 \n\
2812 Return a copy of string S with all occurrences of substring\n\
2813 old replaced by new. If the optional argument count is\n\
2814 given, only the first count occurrences are replaced.");
2815
2816 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2817 string_replace(PyStringObject *self, PyObject *args)
2818 {
2819 Py_ssize_t count = -1;
2820 PyObject *from, *to;
2821 const char *from_s, *to_s;
2822 Py_ssize_t from_len, to_len;
2823
2824 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2825 return NULL;
2826
2827 if (PyString_Check(from)) {
2828 from_s = PyString_AS_STRING(from);
2829 from_len = PyString_GET_SIZE(from);
2830 }
2831 #ifdef Py_USING_UNICODE
2832 if (PyUnicode_Check(from))
2833 return PyUnicode_Replace((PyObject *)self,
2834 from, to, count);
2835 #endif
2836 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2837 return NULL;
2838
2839 if (PyString_Check(to)) {
2840 to_s = PyString_AS_STRING(to);
2841 to_len = PyString_GET_SIZE(to);
2842 }
2843 #ifdef Py_USING_UNICODE
2844 else if (PyUnicode_Check(to))
2845 return PyUnicode_Replace((PyObject *)self,
2846 from, to, count);
2847 #endif
2848 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2849 return NULL;
2850
2851 return (PyObject *)replace((PyStringObject *) self,
2852 from_s, from_len,
2853 to_s, to_len, count);
2854 }
2855
2856 /** End DALKE **/
2857
2858 /* Matches the end (direction >= 0) or start (direction < 0) of self
2859 * against substr, using the start and end arguments. Returns
2860 * -1 on error, 0 if not found and 1 if found.
2861 */
2862 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2863 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2864 Py_ssize_t end, int direction)
2865 {
2866 Py_ssize_t len = PyString_GET_SIZE(self);
2867 Py_ssize_t slen;
2868 const char* sub;
2869 const char* str;
2870
2871 if (PyString_Check(substr)) {
2872 sub = PyString_AS_STRING(substr);
2873 slen = PyString_GET_SIZE(substr);
2874 }
2875 #ifdef Py_USING_UNICODE
2876 else if (PyUnicode_Check(substr))
2877 return PyUnicode_Tailmatch((PyObject *)self,
2878 substr, start, end, direction);
2879 #endif
2880 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2881 return -1;
2882 str = PyString_AS_STRING(self);
2883
2884 ADJUST_INDICES(start, end, len);
2885
2886 if (direction < 0) {
2887 /* startswith */
2888 if (start+slen > len)
2889 return 0;
2890 } else {
2891 /* endswith */
2892 if (end-start < slen || start > len)
2893 return 0;
2894
2895 if (end-slen > start)
2896 start = end - slen;
2897 }
2898 if (end-start >= slen)
2899 return ! memcmp(str+start, sub, slen);
2900 return 0;
2901 }
2902
2903
2904 PyDoc_STRVAR(startswith__doc__,
2905 "S.startswith(prefix[, start[, end]]) -> bool\n\
2906 \n\
2907 Return True if S starts with the specified prefix, False otherwise.\n\
2908 With optional start, test S beginning at that position.\n\
2909 With optional end, stop comparing S at that position.\n\
2910 prefix can also be a tuple of strings to try.");
2911
2912 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2913 string_startswith(PyStringObject *self, PyObject *args)
2914 {
2915 Py_ssize_t start = 0;
2916 Py_ssize_t end = PY_SSIZE_T_MAX;
2917 PyObject *subobj;
2918 int result;
2919
2920 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2932 }
2933 }
2934 Py_RETURN_FALSE;
2935 }
2936 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (result == -1) {
2938 if (PyErr_ExceptionMatches(PyExc_TypeError))
2939 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2940 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2941 return NULL;
2942 }
2943 else
2944 return PyBool_FromLong(result);
2945 }
2946
2947
2948 PyDoc_STRVAR(endswith__doc__,
2949 "S.endswith(suffix[, start[, end]]) -> bool\n\
2950 \n\
2951 Return True if S ends with the specified suffix, False otherwise.\n\
2952 With optional start, test S beginning at that position.\n\
2953 With optional end, stop comparing S at that position.\n\
2954 suffix can also be a tuple of strings to try.");
2955
2956 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2957 string_endswith(PyStringObject *self, PyObject *args)
2958 {
2959 Py_ssize_t start = 0;
2960 Py_ssize_t end = PY_SSIZE_T_MAX;
2961 PyObject *subobj;
2962 int result;
2963
2964 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2965 return NULL;
2966 if (PyTuple_Check(subobj)) {
2967 Py_ssize_t i;
2968 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2969 result = _string_tailmatch(self,
2970 PyTuple_GET_ITEM(subobj, i),
2971 start, end, +1);
2972 if (result == -1)
2973 return NULL;
2974 else if (result) {
2975 Py_RETURN_TRUE;
2976 }
2977 }
2978 Py_RETURN_FALSE;
2979 }
2980 result = _string_tailmatch(self, subobj, start, end, +1);
2981 if (result == -1) {
2982 if (PyErr_ExceptionMatches(PyExc_TypeError))
2983 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2984 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2985 return NULL;
2986 }
2987 else
2988 return PyBool_FromLong(result);
2989 }
2990
2991
2992 PyDoc_STRVAR(encode__doc__,
2993 "S.encode([encoding[,errors]]) -> object\n\
2994 \n\
2995 Encodes S using the codec registered for encoding. encoding defaults\n\
2996 to the default encoding. errors may be given to set a different error\n\
2997 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2998 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2999 'xmlcharrefreplace' as well as any other name registered with\n\
3000 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3001
3002 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3003 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3004 {
3005 static char *kwlist[] = {"encoding", "errors", 0};
3006 char *encoding = NULL;
3007 char *errors = NULL;
3008 PyObject *v;
3009
3010 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3011 kwlist, &encoding, &errors))
3012 return NULL;
3013 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3014 if (v == NULL)
3015 goto onError;
3016 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3017 PyErr_Format(PyExc_TypeError,
3018 "encoder did not return a string/unicode object "
3019 "(type=%.400s)",
3020 Py_TYPE(v)->tp_name);
3021 Py_DECREF(v);
3022 return NULL;
3023 }
3024 return v;
3025
3026 onError:
3027 return NULL;
3028 }
3029
3030
3031 PyDoc_STRVAR(decode__doc__,
3032 "S.decode([encoding[,errors]]) -> object\n\
3033 \n\
3034 Decodes S using the codec registered for encoding. encoding defaults\n\
3035 to the default encoding. errors may be given to set a different error\n\
3036 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3037 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3038 as well as any other name registered with codecs.register_error that is\n\
3039 able to handle UnicodeDecodeErrors.");
3040
3041 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3042 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3043 {
3044 static char *kwlist[] = {"encoding", "errors", 0};
3045 char *encoding = NULL;
3046 char *errors = NULL;
3047 PyObject *v;
3048
3049 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3050 kwlist, &encoding, &errors))
3051 return NULL;
3052 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3053 if (v == NULL)
3054 goto onError;
3055 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3056 PyErr_Format(PyExc_TypeError,
3057 "decoder did not return a string/unicode object "
3058 "(type=%.400s)",
3059 Py_TYPE(v)->tp_name);
3060 Py_DECREF(v);
3061 return NULL;
3062 }
3063 return v;
3064
3065 onError:
3066 return NULL;
3067 }
3068
3069
3070 PyDoc_STRVAR(expandtabs__doc__,
3071 "S.expandtabs([tabsize]) -> string\n\
3072 \n\
3073 Return a copy of S where all tab characters are expanded using spaces.\n\
3074 If tabsize is not given, a tab size of 8 characters is assumed.");
3075
3076 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3077 string_expandtabs(PyStringObject *self, PyObject *args)
3078 {
3079 const char *e, *p, *qe;
3080 char *q;
3081 Py_ssize_t i, j, incr;
3082 PyObject *u;
3083 int tabsize = 8;
3084
3085 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3086 return NULL;
3087
3088 /* First pass: determine size of output string */
3089 i = 0; /* chars up to and including most recent \n or \r */
3090 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3091 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3092 for (p = PyString_AS_STRING(self); p < e; p++) {
3093 if (*p == '\t') {
3094 if (tabsize > 0) {
3095 incr = tabsize - (j % tabsize);
3096 if (j > PY_SSIZE_T_MAX - incr)
3097 goto overflow1;
3098 j += incr;
3099 }
3100 }
3101 else {
3102 if (j > PY_SSIZE_T_MAX - 1)
3103 goto overflow1;
3104 j++;
3105 if (*p == '\n' || *p == '\r') {
3106 if (i > PY_SSIZE_T_MAX - j)
3107 goto overflow1;
3108 i += j;
3109 j = 0;
3110 }
3111 }
3112 }
3113
3114 if (i > PY_SSIZE_T_MAX - j)
3115 goto overflow1;
3116
3117 /* Second pass: create output string and fill it */
3118 u = PyString_FromStringAndSize(NULL, i + j);
3119 if (!u)
3120 return NULL;
3121
3122 j = 0; /* same as in first pass */
3123 q = PyString_AS_STRING(u); /* next output char */
3124 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3125
3126 for (p = PyString_AS_STRING(self); p < e; p++) {
3127 if (*p == '\t') {
3128 if (tabsize > 0) {
3129 i = tabsize - (j % tabsize);
3130 j += i;
3131 while (i--) {
3132 if (q >= qe)
3133 goto overflow2;
3134 *q++ = ' ';
3135 }
3136 }
3137 }
3138 else {
3139 if (q >= qe)
3140 goto overflow2;
3141 *q++ = *p;
3142 j++;
3143 if (*p == '\n' || *p == '\r')
3144 j = 0;
3145 }
3146 }
3147
3148 return u;
3149
3150 overflow2:
3151 Py_DECREF(u);
3152 overflow1:
3153 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154 return NULL;
3155 }
3156
3157 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3158 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3159 {
3160 PyObject *u;
3161
3162 if (left < 0)
3163 left = 0;
3164 if (right < 0)
3165 right = 0;
3166
3167 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3168 Py_INCREF(self);
3169 return (PyObject *)self;
3170 }
3171
3172 u = PyString_FromStringAndSize(NULL,
3173 left + PyString_GET_SIZE(self) + right);
3174 if (u) {
3175 if (left)
3176 memset(PyString_AS_STRING(u), fill, left);
3177 Py_MEMCPY(PyString_AS_STRING(u) + left,
3178 PyString_AS_STRING(self),
3179 PyString_GET_SIZE(self));
3180 if (right)
3181 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3182 fill, right);
3183 }
3184
3185 return u;
3186 }
3187
3188 PyDoc_STRVAR(ljust__doc__,
3189 "S.ljust(width[, fillchar]) -> string\n"
3190 "\n"
3191 "Return S left-justified in a string of length width. Padding is\n"
3192 "done using the specified fill character (default is a space).");
3193
3194 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3195 string_ljust(PyStringObject *self, PyObject *args)
3196 {
3197 Py_ssize_t width;
3198 char fillchar = ' ';
3199
3200 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3201 return NULL;
3202
3203 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3204 Py_INCREF(self);
3205 return (PyObject*) self;
3206 }
3207
3208 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3209 }
3210
3211
3212 PyDoc_STRVAR(rjust__doc__,
3213 "S.rjust(width[, fillchar]) -> string\n"
3214 "\n"
3215 "Return S right-justified in a string of length width. Padding is\n"
3216 "done using the specified fill character (default is a space)");
3217
3218 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3219 string_rjust(PyStringObject *self, PyObject *args)
3220 {
3221 Py_ssize_t width;
3222 char fillchar = ' ';
3223
3224 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3225 return NULL;
3226
3227 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3228 Py_INCREF(self);
3229 return (PyObject*) self;
3230 }
3231
3232 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3233 }
3234
3235
3236 PyDoc_STRVAR(center__doc__,
3237 "S.center(width[, fillchar]) -> string\n"
3238 "\n"
3239 "Return S centered in a string of length width. Padding is\n"
3240 "done using the specified fill character (default is a space)");
3241
3242 static PyObject *
string_center(PyStringObject * self,PyObject * args)3243 string_center(PyStringObject *self, PyObject *args)
3244 {
3245 Py_ssize_t marg, left;
3246 Py_ssize_t width;
3247 char fillchar = ' ';
3248
3249 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3250 return NULL;
3251
3252 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3253 Py_INCREF(self);
3254 return (PyObject*) self;
3255 }
3256
3257 marg = width - PyString_GET_SIZE(self);
3258 left = marg / 2 + (marg & width & 1);
3259
3260 return pad(self, left, marg - left, fillchar);
3261 }
3262
3263 PyDoc_STRVAR(zfill__doc__,
3264 "S.zfill(width) -> string\n"
3265 "\n"
3266 "Pad a numeric string S with zeros on the left, to fill a field\n"
3267 "of the specified width. The string S is never truncated.");
3268
3269 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3270 string_zfill(PyStringObject *self, PyObject *args)
3271 {
3272 Py_ssize_t fill;
3273 PyObject *s;
3274 char *p;
3275 Py_ssize_t width;
3276
3277 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3278 return NULL;
3279
3280 if (PyString_GET_SIZE(self) >= width) {
3281 if (PyString_CheckExact(self)) {
3282 Py_INCREF(self);
3283 return (PyObject*) self;
3284 }
3285 else
3286 return PyString_FromStringAndSize(
3287 PyString_AS_STRING(self),
3288 PyString_GET_SIZE(self)
3289 );
3290 }
3291
3292 fill = width - PyString_GET_SIZE(self);
3293
3294 s = pad(self, fill, 0, '0');
3295
3296 if (s == NULL)
3297 return NULL;
3298
3299 p = PyString_AS_STRING(s);
3300 if (p[fill] == '+' || p[fill] == '-') {
3301 /* move sign to beginning of string */
3302 p[0] = p[fill];
3303 p[fill] = '0';
3304 }
3305
3306 return (PyObject*) s;
3307 }
3308
3309 PyDoc_STRVAR(isspace__doc__,
3310 "S.isspace() -> bool\n\
3311 \n\
3312 Return True if all characters in S are whitespace\n\
3313 and there is at least one character in S, False otherwise.");
3314
3315 static PyObject*
string_isspace(PyStringObject * self)3316 string_isspace(PyStringObject *self)
3317 {
3318 register const unsigned char *p
3319 = (unsigned char *) PyString_AS_STRING(self);
3320 register const unsigned char *e;
3321
3322 /* Shortcut for single character strings */
3323 if (PyString_GET_SIZE(self) == 1 &&
3324 isspace(*p))
3325 return PyBool_FromLong(1);
3326
3327 /* Special case for empty strings */
3328 if (PyString_GET_SIZE(self) == 0)
3329 return PyBool_FromLong(0);
3330
3331 e = p + PyString_GET_SIZE(self);
3332 for (; p < e; p++) {
3333 if (!isspace(*p))
3334 return PyBool_FromLong(0);
3335 }
3336 return PyBool_FromLong(1);
3337 }
3338
3339
3340 PyDoc_STRVAR(isalpha__doc__,
3341 "S.isalpha() -> bool\n\
3342 \n\
3343 Return True if all characters in S are alphabetic\n\
3344 and there is at least one character in S, False otherwise.");
3345
3346 static PyObject*
string_isalpha(PyStringObject * self)3347 string_isalpha(PyStringObject *self)
3348 {
3349 register const unsigned char *p
3350 = (unsigned char *) PyString_AS_STRING(self);
3351 register const unsigned char *e;
3352
3353 /* Shortcut for single character strings */
3354 if (PyString_GET_SIZE(self) == 1 &&
3355 isalpha(*p))
3356 return PyBool_FromLong(1);
3357
3358 /* Special case for empty strings */
3359 if (PyString_GET_SIZE(self) == 0)
3360 return PyBool_FromLong(0);
3361
3362 e = p + PyString_GET_SIZE(self);
3363 for (; p < e; p++) {
3364 if (!isalpha(*p))
3365 return PyBool_FromLong(0);
3366 }
3367 return PyBool_FromLong(1);
3368 }
3369
3370
3371 PyDoc_STRVAR(isalnum__doc__,
3372 "S.isalnum() -> bool\n\
3373 \n\
3374 Return True if all characters in S are alphanumeric\n\
3375 and there is at least one character in S, False otherwise.");
3376
3377 static PyObject*
string_isalnum(PyStringObject * self)3378 string_isalnum(PyStringObject *self)
3379 {
3380 register const unsigned char *p
3381 = (unsigned char *) PyString_AS_STRING(self);
3382 register const unsigned char *e;
3383
3384 /* Shortcut for single character strings */
3385 if (PyString_GET_SIZE(self) == 1 &&
3386 isalnum(*p))
3387 return PyBool_FromLong(1);
3388
3389 /* Special case for empty strings */
3390 if (PyString_GET_SIZE(self) == 0)
3391 return PyBool_FromLong(0);
3392
3393 e = p + PyString_GET_SIZE(self);
3394 for (; p < e; p++) {
3395 if (!isalnum(*p))
3396 return PyBool_FromLong(0);
3397 }
3398 return PyBool_FromLong(1);
3399 }
3400
3401
3402 PyDoc_STRVAR(isdigit__doc__,
3403 "S.isdigit() -> bool\n\
3404 \n\
3405 Return True if all characters in S are digits\n\
3406 and there is at least one character in S, False otherwise.");
3407
3408 static PyObject*
string_isdigit(PyStringObject * self)3409 string_isdigit(PyStringObject *self)
3410 {
3411 register const unsigned char *p
3412 = (unsigned char *) PyString_AS_STRING(self);
3413 register const unsigned char *e;
3414
3415 /* Shortcut for single character strings */
3416 if (PyString_GET_SIZE(self) == 1 &&
3417 isdigit(*p))
3418 return PyBool_FromLong(1);
3419
3420 /* Special case for empty strings */
3421 if (PyString_GET_SIZE(self) == 0)
3422 return PyBool_FromLong(0);
3423
3424 e = p + PyString_GET_SIZE(self);
3425 for (; p < e; p++) {
3426 if (!isdigit(*p))
3427 return PyBool_FromLong(0);
3428 }
3429 return PyBool_FromLong(1);
3430 }
3431
3432
3433 PyDoc_STRVAR(islower__doc__,
3434 "S.islower() -> bool\n\
3435 \n\
3436 Return True if all cased characters in S are lowercase and there is\n\
3437 at least one cased character in S, False otherwise.");
3438
3439 static PyObject*
string_islower(PyStringObject * self)3440 string_islower(PyStringObject *self)
3441 {
3442 register const unsigned char *p
3443 = (unsigned char *) PyString_AS_STRING(self);
3444 register const unsigned char *e;
3445 int cased;
3446
3447 /* Shortcut for single character strings */
3448 if (PyString_GET_SIZE(self) == 1)
3449 return PyBool_FromLong(islower(*p) != 0);
3450
3451 /* Special case for empty strings */
3452 if (PyString_GET_SIZE(self) == 0)
3453 return PyBool_FromLong(0);
3454
3455 e = p + PyString_GET_SIZE(self);
3456 cased = 0;
3457 for (; p < e; p++) {
3458 if (isupper(*p))
3459 return PyBool_FromLong(0);
3460 else if (!cased && islower(*p))
3461 cased = 1;
3462 }
3463 return PyBool_FromLong(cased);
3464 }
3465
3466
3467 PyDoc_STRVAR(isupper__doc__,
3468 "S.isupper() -> bool\n\
3469 \n\
3470 Return True if all cased characters in S are uppercase and there is\n\
3471 at least one cased character in S, False otherwise.");
3472
3473 static PyObject*
string_isupper(PyStringObject * self)3474 string_isupper(PyStringObject *self)
3475 {
3476 register const unsigned char *p
3477 = (unsigned char *) PyString_AS_STRING(self);
3478 register const unsigned char *e;
3479 int cased;
3480
3481 /* Shortcut for single character strings */
3482 if (PyString_GET_SIZE(self) == 1)
3483 return PyBool_FromLong(isupper(*p) != 0);
3484
3485 /* Special case for empty strings */
3486 if (PyString_GET_SIZE(self) == 0)
3487 return PyBool_FromLong(0);
3488
3489 e = p + PyString_GET_SIZE(self);
3490 cased = 0;
3491 for (; p < e; p++) {
3492 if (islower(*p))
3493 return PyBool_FromLong(0);
3494 else if (!cased && isupper(*p))
3495 cased = 1;
3496 }
3497 return PyBool_FromLong(cased);
3498 }
3499
3500
3501 PyDoc_STRVAR(istitle__doc__,
3502 "S.istitle() -> bool\n\
3503 \n\
3504 Return True if S is a titlecased string and there is at least one\n\
3505 character in S, i.e. uppercase characters may only follow uncased\n\
3506 characters and lowercase characters only cased ones. Return False\n\
3507 otherwise.");
3508
3509 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3510 string_istitle(PyStringObject *self, PyObject *uncased)
3511 {
3512 register const unsigned char *p
3513 = (unsigned char *) PyString_AS_STRING(self);
3514 register const unsigned char *e;
3515 int cased, previous_is_cased;
3516
3517 /* Shortcut for single character strings */
3518 if (PyString_GET_SIZE(self) == 1)
3519 return PyBool_FromLong(isupper(*p) != 0);
3520
3521 /* Special case for empty strings */
3522 if (PyString_GET_SIZE(self) == 0)
3523 return PyBool_FromLong(0);
3524
3525 e = p + PyString_GET_SIZE(self);
3526 cased = 0;
3527 previous_is_cased = 0;
3528 for (; p < e; p++) {
3529 register const unsigned char ch = *p;
3530
3531 if (isupper(ch)) {
3532 if (previous_is_cased)
3533 return PyBool_FromLong(0);
3534 previous_is_cased = 1;
3535 cased = 1;
3536 }
3537 else if (islower(ch)) {
3538 if (!previous_is_cased)
3539 return PyBool_FromLong(0);
3540 previous_is_cased = 1;
3541 cased = 1;
3542 }
3543 else
3544 previous_is_cased = 0;
3545 }
3546 return PyBool_FromLong(cased);
3547 }
3548
3549
3550 PyDoc_STRVAR(splitlines__doc__,
3551 "S.splitlines(keepends=False) -> list of strings\n\
3552 \n\
3553 Return a list of the lines in S, breaking at line boundaries.\n\
3554 Line breaks are not included in the resulting list unless keepends\n\
3555 is given and true.");
3556
3557 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3558 string_splitlines(PyStringObject *self, PyObject *args)
3559 {
3560 int keepends = 0;
3561
3562 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3563 return NULL;
3564
3565 return stringlib_splitlines(
3566 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3567 keepends
3568 );
3569 }
3570
3571 PyDoc_STRVAR(sizeof__doc__,
3572 "S.__sizeof__() -> size of S in memory, in bytes");
3573
3574 static PyObject *
string_sizeof(PyStringObject * v)3575 string_sizeof(PyStringObject *v)
3576 {
3577 Py_ssize_t res;
3578 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3579 return PyInt_FromSsize_t(res);
3580 }
3581
3582 static PyObject *
string_getnewargs(PyStringObject * v)3583 string_getnewargs(PyStringObject *v)
3584 {
3585 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3586 }
3587
3588
3589 #include "stringlib/string_format.h"
3590
3591 PyDoc_STRVAR(format__doc__,
3592 "S.format(*args, **kwargs) -> string\n\
3593 \n\
3594 Return a formatted version of S, using substitutions from args and kwargs.\n\
3595 The substitutions are identified by braces ('{' and '}').");
3596
3597 static PyObject *
string__format__(PyObject * self,PyObject * args)3598 string__format__(PyObject* self, PyObject* args)
3599 {
3600 PyObject *format_spec;
3601 PyObject *result = NULL;
3602 PyObject *tmp = NULL;
3603
3604 /* If 2.x, convert format_spec to the same type as value */
3605 /* This is to allow things like u''.format('') */
3606 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3607 goto done;
3608 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3609 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3610 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3611 goto done;
3612 }
3613 tmp = PyObject_Str(format_spec);
3614 if (tmp == NULL)
3615 goto done;
3616 format_spec = tmp;
3617
3618 result = _PyBytes_FormatAdvanced(self,
3619 PyString_AS_STRING(format_spec),
3620 PyString_GET_SIZE(format_spec));
3621 done:
3622 Py_XDECREF(tmp);
3623 return result;
3624 }
3625
3626 PyDoc_STRVAR(p_format__doc__,
3627 "S.__format__(format_spec) -> string\n\
3628 \n\
3629 Return a formatted version of S as described by format_spec.");
3630
3631
3632 static PyMethodDef
3633 string_methods[] = {
3634 /* Counterparts of the obsolete stropmodule functions; except
3635 string.maketrans(). */
3636 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3637 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3638 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3639 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3640 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3641 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3642 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3643 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3644 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3645 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3646 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3647 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3648 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3649 capitalize__doc__},
3650 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3651 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3652 endswith__doc__},
3653 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3654 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3655 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3656 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3657 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3658 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3659 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3660 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3661 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3662 rpartition__doc__},
3663 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3664 startswith__doc__},
3665 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3666 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3667 swapcase__doc__},
3668 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3669 translate__doc__},
3670 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3671 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3672 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3673 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3674 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3675 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3676 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3677 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3678 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3679 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3680 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3681 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3682 expandtabs__doc__},
3683 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3684 splitlines__doc__},
3685 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3686 sizeof__doc__},
3687 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3688 {NULL, NULL} /* sentinel */
3689 };
3690
3691 static PyObject *
3692 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3693
3694 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3695 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3696 {
3697 PyObject *x = NULL;
3698 static char *kwlist[] = {"object", 0};
3699
3700 if (type != &PyString_Type)
3701 return str_subtype_new(type, args, kwds);
3702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3703 return NULL;
3704 if (x == NULL)
3705 return PyString_FromString("");
3706 return PyObject_Str(x);
3707 }
3708
3709 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3710 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3711 {
3712 PyObject *tmp, *pnew;
3713 Py_ssize_t n;
3714
3715 assert(PyType_IsSubtype(type, &PyString_Type));
3716 tmp = string_new(&PyString_Type, args, kwds);
3717 if (tmp == NULL)
3718 return NULL;
3719 assert(PyString_Check(tmp));
3720 n = PyString_GET_SIZE(tmp);
3721 pnew = type->tp_alloc(type, n);
3722 if (pnew != NULL) {
3723 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3724 ((PyStringObject *)pnew)->ob_shash =
3725 ((PyStringObject *)tmp)->ob_shash;
3726 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3727 }
3728 Py_DECREF(tmp);
3729 return pnew;
3730 }
3731
3732 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3733 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3734 {
3735 PyErr_SetString(PyExc_TypeError,
3736 "The basestring type cannot be instantiated");
3737 return NULL;
3738 }
3739
3740 static PyObject *
string_mod(PyObject * v,PyObject * w)3741 string_mod(PyObject *v, PyObject *w)
3742 {
3743 if (!PyString_Check(v)) {
3744 Py_INCREF(Py_NotImplemented);
3745 return Py_NotImplemented;
3746 }
3747 return PyString_Format(v, w);
3748 }
3749
3750 PyDoc_STRVAR(basestring_doc,
3751 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3752
3753 static PyNumberMethods string_as_number = {
3754 0, /*nb_add*/
3755 0, /*nb_subtract*/
3756 0, /*nb_multiply*/
3757 0, /*nb_divide*/
3758 string_mod, /*nb_remainder*/
3759 };
3760
3761
3762 PyTypeObject PyBaseString_Type = {
3763 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3764 "basestring",
3765 0,
3766 0,
3767 0, /* tp_dealloc */
3768 0, /* tp_print */
3769 0, /* tp_getattr */
3770 0, /* tp_setattr */
3771 0, /* tp_compare */
3772 0, /* tp_repr */
3773 0, /* tp_as_number */
3774 0, /* tp_as_sequence */
3775 0, /* tp_as_mapping */
3776 0, /* tp_hash */
3777 0, /* tp_call */
3778 0, /* tp_str */
3779 0, /* tp_getattro */
3780 0, /* tp_setattro */
3781 0, /* tp_as_buffer */
3782 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3783 basestring_doc, /* tp_doc */
3784 0, /* tp_traverse */
3785 0, /* tp_clear */
3786 0, /* tp_richcompare */
3787 0, /* tp_weaklistoffset */
3788 0, /* tp_iter */
3789 0, /* tp_iternext */
3790 0, /* tp_methods */
3791 0, /* tp_members */
3792 0, /* tp_getset */
3793 &PyBaseObject_Type, /* tp_base */
3794 0, /* tp_dict */
3795 0, /* tp_descr_get */
3796 0, /* tp_descr_set */
3797 0, /* tp_dictoffset */
3798 0, /* tp_init */
3799 0, /* tp_alloc */
3800 basestring_new, /* tp_new */
3801 0, /* tp_free */
3802 };
3803
3804 PyDoc_STRVAR(string_doc,
3805 "str(object='') -> string\n\
3806 \n\
3807 Return a nice string representation of the object.\n\
3808 If the argument is a string, the return value is the same object.");
3809
3810 PyTypeObject PyString_Type = {
3811 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3812 "str",
3813 PyStringObject_SIZE,
3814 sizeof(char),
3815 string_dealloc, /* tp_dealloc */
3816 (printfunc)string_print, /* tp_print */
3817 0, /* tp_getattr */
3818 0, /* tp_setattr */
3819 0, /* tp_compare */
3820 string_repr, /* tp_repr */
3821 &string_as_number, /* tp_as_number */
3822 &string_as_sequence, /* tp_as_sequence */
3823 &string_as_mapping, /* tp_as_mapping */
3824 (hashfunc)string_hash, /* tp_hash */
3825 0, /* tp_call */
3826 string_str, /* tp_str */
3827 PyObject_GenericGetAttr, /* tp_getattro */
3828 0, /* tp_setattro */
3829 &string_as_buffer, /* tp_as_buffer */
3830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3831 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3832 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3833 string_doc, /* tp_doc */
3834 0, /* tp_traverse */
3835 0, /* tp_clear */
3836 (richcmpfunc)string_richcompare, /* tp_richcompare */
3837 0, /* tp_weaklistoffset */
3838 0, /* tp_iter */
3839 0, /* tp_iternext */
3840 string_methods, /* tp_methods */
3841 0, /* tp_members */
3842 0, /* tp_getset */
3843 &PyBaseString_Type, /* tp_base */
3844 0, /* tp_dict */
3845 0, /* tp_descr_get */
3846 0, /* tp_descr_set */
3847 0, /* tp_dictoffset */
3848 0, /* tp_init */
3849 0, /* tp_alloc */
3850 string_new, /* tp_new */
3851 PyObject_Del, /* tp_free */
3852 };
3853
3854 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3855 PyString_Concat(register PyObject **pv, register PyObject *w)
3856 {
3857 register PyObject *v;
3858 if (*pv == NULL)
3859 return;
3860 if (w == NULL || !PyString_Check(*pv)) {
3861 Py_CLEAR(*pv);
3862 return;
3863 }
3864 v = string_concat((PyStringObject *) *pv, w);
3865 Py_SETREF(*pv, v);
3866 }
3867
3868 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3869 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3870 {
3871 PyString_Concat(pv, w);
3872 Py_XDECREF(w);
3873 }
3874
3875
3876 /* The following function breaks the notion that strings are immutable:
3877 it changes the size of a string. We get away with this only if there
3878 is only one module referencing the object. You can also think of it
3879 as creating a new string object and destroying the old one, only
3880 more efficiently. In any case, don't use this if the string may
3881 already be known to some other part of the code...
3882 Note that if there's not enough memory to resize the string, the original
3883 string object at *pv is deallocated, *pv is set to NULL, an "out of
3884 memory" exception is set, and -1 is returned. Else (on success) 0 is
3885 returned, and the value in *pv may or may not be the same as on input.
3886 As always, an extra byte is allocated for a trailing \0 byte (newsize
3887 does *not* include that), and a trailing \0 byte is stored.
3888 */
3889
3890 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3891 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3892 {
3893 register PyObject *v;
3894 register PyStringObject *sv;
3895 v = *pv;
3896 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3897 PyString_CHECK_INTERNED(v)) {
3898 *pv = 0;
3899 Py_DECREF(v);
3900 PyErr_BadInternalCall();
3901 return -1;
3902 }
3903 /* XXX UNREF/NEWREF interface should be more symmetrical */
3904 _Py_DEC_REFTOTAL;
3905 _Py_ForgetReference(v);
3906 *pv = (PyObject *)
3907 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3908 if (*pv == NULL) {
3909 PyObject_Del(v);
3910 PyErr_NoMemory();
3911 return -1;
3912 }
3913 _Py_NewReference(*pv);
3914 sv = (PyStringObject *) *pv;
3915 Py_SIZE(sv) = newsize;
3916 sv->ob_sval[newsize] = '\0';
3917 sv->ob_shash = -1; /* invalidate cached hash value */
3918 return 0;
3919 }
3920
3921 /* Helpers for formatstring */
3922
3923 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3924 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3925 {
3926 Py_ssize_t argidx = *p_argidx;
3927 if (argidx < arglen) {
3928 (*p_argidx)++;
3929 if (arglen < 0)
3930 return args;
3931 else
3932 return PyTuple_GetItem(args, argidx);
3933 }
3934 PyErr_SetString(PyExc_TypeError,
3935 "not enough arguments for format string");
3936 return NULL;
3937 }
3938
3939 /* Format codes
3940 * F_LJUST '-'
3941 * F_SIGN '+'
3942 * F_BLANK ' '
3943 * F_ALT '#'
3944 * F_ZERO '0'
3945 */
3946 #define F_LJUST (1<<0)
3947 #define F_SIGN (1<<1)
3948 #define F_BLANK (1<<2)
3949 #define F_ALT (1<<3)
3950 #define F_ZERO (1<<4)
3951
3952 /* Returns a new reference to a PyString object, or NULL on failure. */
3953
3954 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3955 formatfloat(PyObject *v, int flags, int prec, int type)
3956 {
3957 char *p;
3958 PyObject *result;
3959 double x;
3960
3961 x = PyFloat_AsDouble(v);
3962 if (x == -1.0 && PyErr_Occurred()) {
3963 PyErr_Format(PyExc_TypeError, "float argument required, "
3964 "not %.200s", Py_TYPE(v)->tp_name);
3965 return NULL;
3966 }
3967
3968 if (prec < 0)
3969 prec = 6;
3970
3971 p = PyOS_double_to_string(x, type, prec,
3972 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3973
3974 if (p == NULL)
3975 return NULL;
3976 result = PyString_FromStringAndSize(p, strlen(p));
3977 PyMem_Free(p);
3978 return result;
3979 }
3980
3981 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3982 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3983 * Python's regular ints.
3984 * Return value: a new PyString*, or NULL if error.
3985 * . *pbuf is set to point into it,
3986 * *plen set to the # of chars following that.
3987 * Caller must decref it when done using pbuf.
3988 * The string starting at *pbuf is of the form
3989 * "-"? ("0x" | "0X")? digit+
3990 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3991 * set in flags. The case of hex digits will be correct,
3992 * There will be at least prec digits, zero-filled on the left if
3993 * necessary to get that many.
3994 * val object to be converted
3995 * flags bitmask of format flags; only F_ALT is looked at
3996 * prec minimum number of digits; 0-fill on left if needed
3997 * type a character in [duoxX]; u acts the same as d
3998 *
3999 * CAUTION: o, x and X conversions on regular ints can never
4000 * produce a '-' sign, but can for Python's unbounded ints.
4001 */
4002 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4003 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4004 char **pbuf, int *plen)
4005 {
4006 PyObject *result = NULL, *r1;
4007 const char *s;
4008 char *buf;
4009 Py_ssize_t i;
4010 int sign; /* 1 if '-', else 0 */
4011 int len; /* number of characters */
4012 Py_ssize_t llen;
4013 int numdigits; /* len == numnondigits + skipped + numdigits */
4014 int numnondigits, skipped, filled;
4015 const char *method;
4016
4017 switch (type) {
4018 case 'd':
4019 case 'u':
4020 method = "str";
4021 result = Py_TYPE(val)->tp_str(val);
4022 break;
4023 case 'o':
4024 method = "oct";
4025 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4026 break;
4027 case 'x':
4028 case 'X':
4029 method = "hex";
4030 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4031 break;
4032 default:
4033 assert(!"'type' not in [duoxX]");
4034 }
4035 if (!result)
4036 return NULL;
4037
4038 if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4039 Py_DECREF(result);
4040 return NULL;
4041 }
4042 if (llen > INT_MAX) {
4043 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4044 Py_DECREF(result);
4045 return NULL;
4046 }
4047 len = (int)llen;
4048 if (len > 0 && s[len-1] == 'L') {
4049 --len;
4050 if (len == 0)
4051 goto error;
4052 }
4053 sign = s[0] == '-';
4054 numnondigits = sign;
4055
4056 /* Need to skip 0x, 0X or 0. */
4057 skipped = 0;
4058 switch (type) {
4059 case 'o':
4060 if (s[sign] != '0')
4061 goto error;
4062 /* If 0 is only digit, leave it alone. */
4063 if ((flags & F_ALT) == 0 && len - sign > 1)
4064 skipped = 1;
4065 break;
4066 case 'x':
4067 case 'X':
4068 if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4069 goto error;
4070 if ((flags & F_ALT) == 0)
4071 skipped = 2;
4072 else
4073 numnondigits += 2;
4074 break;
4075 }
4076 numdigits = len - numnondigits - skipped;
4077 if (numdigits <= 0)
4078 goto error;
4079
4080 filled = prec - numdigits;
4081 if (filled < 0)
4082 filled = 0;
4083 len = numnondigits + filled + numdigits;
4084
4085 /* To modify the string in-place, there can only be one reference. */
4086 if (skipped >= filled &&
4087 PyString_CheckExact(result) &&
4088 Py_REFCNT(result) == 1 &&
4089 !PyString_CHECK_INTERNED(result))
4090 {
4091 r1 = NULL;
4092 buf = (char *)s + skipped - filled;
4093 }
4094 else {
4095 r1 = result;
4096 result = PyString_FromStringAndSize(NULL, len);
4097 if (!result) {
4098 Py_DECREF(r1);
4099 return NULL;
4100 }
4101 buf = PyString_AS_STRING(result);
4102 }
4103
4104 for (i = numnondigits; --i >= 0;)
4105 buf[i] = s[i];
4106 buf += numnondigits;
4107 s += numnondigits + skipped;
4108 for (i = 0; i < filled; i++)
4109 *buf++ = '0';
4110 if (r1 == NULL) {
4111 assert(buf == s);
4112 buf += numdigits;
4113 }
4114 else {
4115 for (i = 0; i < numdigits; i++)
4116 *buf++ = *s++;
4117 }
4118 *buf = '\0';
4119 buf -= len;
4120 Py_XDECREF(r1);
4121
4122 /* Fix up case for hex conversions. */
4123 if (type == 'X') {
4124 /* Need to convert all lower case letters to upper case.
4125 and need to convert 0x to 0X (and -0x to -0X). */
4126 for (i = 0; i < len; i++) {
4127 if (buf[i] >= 'a' && buf[i] <= 'z')
4128 buf[i] -= 'a'-'A';
4129 }
4130 }
4131 *pbuf = buf;
4132 *plen = len;
4133 return result;
4134
4135 error:
4136 PyErr_Format(PyExc_ValueError,
4137 "%%%c format: invalid result of __%s__ (type=%.200s)",
4138 type, method, Py_TYPE(val)->tp_name);
4139 Py_DECREF(result);
4140 return NULL;
4141 }
4142
4143 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4144 formatint(char *buf, size_t buflen, int flags,
4145 int prec, int type, PyObject *v)
4146 {
4147 /* fmt = '%#.' + `prec` + 'l' + `type`
4148 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4149 + 1 + 1 = 24 */
4150 char fmt[64]; /* plenty big enough! */
4151 char *sign;
4152 long x;
4153
4154 x = PyInt_AsLong(v);
4155 if (x == -1 && PyErr_Occurred()) {
4156 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4157 Py_TYPE(v)->tp_name);
4158 return -1;
4159 }
4160 if (x < 0 && type == 'u') {
4161 type = 'd';
4162 }
4163 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4164 sign = "-";
4165 else
4166 sign = "";
4167 if (prec < 0)
4168 prec = 1;
4169
4170 if ((flags & F_ALT) &&
4171 (type == 'x' || type == 'X')) {
4172 /* When converting under %#x or %#X, there are a number
4173 * of issues that cause pain:
4174 * - when 0 is being converted, the C standard leaves off
4175 * the '0x' or '0X', which is inconsistent with other
4176 * %#x/%#X conversions and inconsistent with Python's
4177 * hex() function
4178 * - there are platforms that violate the standard and
4179 * convert 0 with the '0x' or '0X'
4180 * (Metrowerks, Compaq Tru64)
4181 * - there are platforms that give '0x' when converting
4182 * under %#X, but convert 0 in accordance with the
4183 * standard (OS/2 EMX)
4184 *
4185 * We can achieve the desired consistency by inserting our
4186 * own '0x' or '0X' prefix, and substituting %x/%X in place
4187 * of %#x/%#X.
4188 *
4189 * Note that this is the same approach as used in
4190 * formatint() in unicodeobject.c
4191 */
4192 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4193 sign, type, prec, type);
4194 }
4195 else {
4196 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4197 sign, (flags&F_ALT) ? "#" : "",
4198 prec, type);
4199 }
4200
4201 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4202 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4203 */
4204 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4205 PyErr_SetString(PyExc_OverflowError,
4206 "formatted integer is too long (precision too large?)");
4207 return -1;
4208 }
4209 if (sign[0])
4210 PyOS_snprintf(buf, buflen, fmt, -x);
4211 else
4212 PyOS_snprintf(buf, buflen, fmt, x);
4213 return (int)strlen(buf);
4214 }
4215
4216 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4217 formatchar(char *buf, size_t buflen, PyObject *v)
4218 {
4219 /* presume that the buffer is at least 2 characters long */
4220 if (PyString_Check(v)) {
4221 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4222 return -1;
4223 }
4224 else {
4225 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4226 return -1;
4227 }
4228 buf[1] = '\0';
4229 return 1;
4230 }
4231
4232 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4233
4234 FORMATBUFLEN is the length of the buffer in which the ints &
4235 chars are formatted. XXX This is a magic number. Each formatting
4236 routine does bounds checking to ensure no overflow, but a better
4237 solution may be to malloc a buffer of appropriate size for each
4238 format. For now, the current solution is sufficient.
4239 */
4240 #define FORMATBUFLEN (size_t)120
4241
4242 PyObject *
PyString_Format(PyObject * format,PyObject * args)4243 PyString_Format(PyObject *format, PyObject *args)
4244 {
4245 char *fmt, *res;
4246 Py_ssize_t arglen, argidx;
4247 Py_ssize_t reslen, rescnt, fmtcnt;
4248 int args_owned = 0;
4249 PyObject *result, *orig_args;
4250 #ifdef Py_USING_UNICODE
4251 PyObject *v, *w;
4252 #endif
4253 PyObject *dict = NULL;
4254 if (format == NULL || !PyString_Check(format) || args == NULL) {
4255 PyErr_BadInternalCall();
4256 return NULL;
4257 }
4258 orig_args = args;
4259 fmt = PyString_AS_STRING(format);
4260 fmtcnt = PyString_GET_SIZE(format);
4261 reslen = rescnt = fmtcnt + 100;
4262 result = PyString_FromStringAndSize((char *)NULL, reslen);
4263 if (result == NULL)
4264 return NULL;
4265 res = PyString_AsString(result);
4266 if (PyTuple_Check(args)) {
4267 arglen = PyTuple_GET_SIZE(args);
4268 argidx = 0;
4269 }
4270 else {
4271 arglen = -1;
4272 argidx = -2;
4273 }
4274 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4275 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4276 dict = args;
4277 while (--fmtcnt >= 0) {
4278 if (*fmt != '%') {
4279 if (--rescnt < 0) {
4280 rescnt = fmtcnt + 100;
4281 reslen += rescnt;
4282 if (_PyString_Resize(&result, reslen))
4283 return NULL;
4284 res = PyString_AS_STRING(result)
4285 + reslen - rescnt;
4286 --rescnt;
4287 }
4288 *res++ = *fmt++;
4289 }
4290 else {
4291 /* Got a format specifier */
4292 int flags = 0;
4293 Py_ssize_t width = -1;
4294 int prec = -1;
4295 int c = '\0';
4296 int fill;
4297 int isnumok;
4298 PyObject *v = NULL;
4299 PyObject *temp = NULL;
4300 char *pbuf;
4301 int sign;
4302 Py_ssize_t len;
4303 char formatbuf[FORMATBUFLEN];
4304 /* For format{int,char}() */
4305 #ifdef Py_USING_UNICODE
4306 char *fmt_start = fmt;
4307 Py_ssize_t argidx_start = argidx;
4308 #endif
4309
4310 fmt++;
4311 if (*fmt == '(') {
4312 char *keystart;
4313 Py_ssize_t keylen;
4314 PyObject *key;
4315 int pcount = 1;
4316
4317 if (dict == NULL) {
4318 PyErr_SetString(PyExc_TypeError,
4319 "format requires a mapping");
4320 goto error;
4321 }
4322 ++fmt;
4323 --fmtcnt;
4324 keystart = fmt;
4325 /* Skip over balanced parentheses */
4326 while (pcount > 0 && --fmtcnt >= 0) {
4327 if (*fmt == ')')
4328 --pcount;
4329 else if (*fmt == '(')
4330 ++pcount;
4331 fmt++;
4332 }
4333 keylen = fmt - keystart - 1;
4334 if (fmtcnt < 0 || pcount > 0) {
4335 PyErr_SetString(PyExc_ValueError,
4336 "incomplete format key");
4337 goto error;
4338 }
4339 key = PyString_FromStringAndSize(keystart,
4340 keylen);
4341 if (key == NULL)
4342 goto error;
4343 if (args_owned) {
4344 Py_DECREF(args);
4345 args_owned = 0;
4346 }
4347 args = PyObject_GetItem(dict, key);
4348 Py_DECREF(key);
4349 if (args == NULL) {
4350 goto error;
4351 }
4352 args_owned = 1;
4353 arglen = -1;
4354 argidx = -2;
4355 }
4356 while (--fmtcnt >= 0) {
4357 switch (c = *fmt++) {
4358 case '-': flags |= F_LJUST; continue;
4359 case '+': flags |= F_SIGN; continue;
4360 case ' ': flags |= F_BLANK; continue;
4361 case '#': flags |= F_ALT; continue;
4362 case '0': flags |= F_ZERO; continue;
4363 }
4364 break;
4365 }
4366 if (c == '*') {
4367 v = getnextarg(args, arglen, &argidx);
4368 if (v == NULL)
4369 goto error;
4370 if (!PyInt_Check(v)) {
4371 PyErr_SetString(PyExc_TypeError,
4372 "* wants int");
4373 goto error;
4374 }
4375 width = PyInt_AsSsize_t(v);
4376 if (width == -1 && PyErr_Occurred())
4377 goto error;
4378 if (width < 0) {
4379 flags |= F_LJUST;
4380 width = -width;
4381 }
4382 if (--fmtcnt >= 0)
4383 c = *fmt++;
4384 }
4385 else if (c >= 0 && isdigit(c)) {
4386 width = c - '0';
4387 while (--fmtcnt >= 0) {
4388 c = Py_CHARMASK(*fmt++);
4389 if (!isdigit(c))
4390 break;
4391 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4392 PyErr_SetString(
4393 PyExc_ValueError,
4394 "width too big");
4395 goto error;
4396 }
4397 width = width*10 + (c - '0');
4398 }
4399 }
4400 if (c == '.') {
4401 prec = 0;
4402 if (--fmtcnt >= 0)
4403 c = *fmt++;
4404 if (c == '*') {
4405 v = getnextarg(args, arglen, &argidx);
4406 if (v == NULL)
4407 goto error;
4408 if (!PyInt_Check(v)) {
4409 PyErr_SetString(
4410 PyExc_TypeError,
4411 "* wants int");
4412 goto error;
4413 }
4414 prec = _PyInt_AsInt(v);
4415 if (prec == -1 && PyErr_Occurred())
4416 goto error;
4417 if (prec < 0)
4418 prec = 0;
4419 if (--fmtcnt >= 0)
4420 c = *fmt++;
4421 }
4422 else if (c >= 0 && isdigit(c)) {
4423 prec = c - '0';
4424 while (--fmtcnt >= 0) {
4425 c = Py_CHARMASK(*fmt++);
4426 if (!isdigit(c))
4427 break;
4428 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4429 PyErr_SetString(
4430 PyExc_ValueError,
4431 "prec too big");
4432 goto error;
4433 }
4434 prec = prec*10 + (c - '0');
4435 }
4436 }
4437 } /* prec */
4438 if (fmtcnt >= 0) {
4439 if (c == 'h' || c == 'l' || c == 'L') {
4440 if (--fmtcnt >= 0)
4441 c = *fmt++;
4442 }
4443 }
4444 if (fmtcnt < 0) {
4445 PyErr_SetString(PyExc_ValueError,
4446 "incomplete format");
4447 goto error;
4448 }
4449 if (c != '%') {
4450 v = getnextarg(args, arglen, &argidx);
4451 if (v == NULL)
4452 goto error;
4453 }
4454 sign = 0;
4455 fill = ' ';
4456 switch (c) {
4457 case '%':
4458 pbuf = "%";
4459 len = 1;
4460 break;
4461 case 's':
4462 #ifdef Py_USING_UNICODE
4463 if (PyUnicode_Check(v)) {
4464 fmt = fmt_start;
4465 argidx = argidx_start;
4466 goto unicode;
4467 }
4468 #endif
4469 temp = _PyObject_Str(v);
4470 #ifdef Py_USING_UNICODE
4471 if (temp != NULL && PyUnicode_Check(temp)) {
4472 Py_DECREF(temp);
4473 fmt = fmt_start;
4474 argidx = argidx_start;
4475 goto unicode;
4476 }
4477 #endif
4478 /* Fall through */
4479 case 'r':
4480 if (c == 'r')
4481 temp = PyObject_Repr(v);
4482 if (temp == NULL)
4483 goto error;
4484 if (!PyString_Check(temp)) {
4485 PyErr_SetString(PyExc_TypeError,
4486 "%s argument has non-string str()");
4487 Py_DECREF(temp);
4488 goto error;
4489 }
4490 pbuf = PyString_AS_STRING(temp);
4491 len = PyString_GET_SIZE(temp);
4492 if (prec >= 0 && len > prec)
4493 len = prec;
4494 break;
4495 case 'i':
4496 case 'd':
4497 case 'u':
4498 case 'o':
4499 case 'x':
4500 case 'X':
4501 if (c == 'i')
4502 c = 'd';
4503 isnumok = 0;
4504 if (PyNumber_Check(v)) {
4505 PyObject *iobj=NULL;
4506
4507 if (_PyAnyInt_Check(v)) {
4508 iobj = v;
4509 Py_INCREF(iobj);
4510 }
4511 else {
4512 iobj = PyNumber_Int(v);
4513 if (iobj==NULL) {
4514 PyErr_Clear();
4515 iobj = PyNumber_Long(v);
4516 }
4517 }
4518 if (iobj!=NULL) {
4519 if (PyInt_Check(iobj)) {
4520 isnumok = 1;
4521 pbuf = formatbuf;
4522 len = formatint(pbuf,
4523 sizeof(formatbuf),
4524 flags, prec, c, iobj);
4525 Py_DECREF(iobj);
4526 if (len < 0)
4527 goto error;
4528 sign = 1;
4529 }
4530 else if (PyLong_Check(iobj)) {
4531 int ilen;
4532
4533 isnumok = 1;
4534 temp = _PyString_FormatLong(iobj, flags,
4535 prec, c, &pbuf, &ilen);
4536 Py_DECREF(iobj);
4537 len = ilen;
4538 if (!temp)
4539 goto error;
4540 sign = 1;
4541 }
4542 else {
4543 Py_DECREF(iobj);
4544 }
4545 }
4546 }
4547 if (!isnumok) {
4548 PyErr_Format(PyExc_TypeError,
4549 "%%%c format: a number is required, "
4550 "not %.200s", c, Py_TYPE(v)->tp_name);
4551 goto error;
4552 }
4553 if (flags & F_ZERO)
4554 fill = '0';
4555 break;
4556 case 'e':
4557 case 'E':
4558 case 'f':
4559 case 'F':
4560 case 'g':
4561 case 'G':
4562 temp = formatfloat(v, flags, prec, c);
4563 if (temp == NULL)
4564 goto error;
4565 pbuf = PyString_AS_STRING(temp);
4566 len = PyString_GET_SIZE(temp);
4567 sign = 1;
4568 if (flags & F_ZERO)
4569 fill = '0';
4570 break;
4571 case 'c':
4572 #ifdef Py_USING_UNICODE
4573 if (PyUnicode_Check(v)) {
4574 fmt = fmt_start;
4575 argidx = argidx_start;
4576 goto unicode;
4577 }
4578 #endif
4579 pbuf = formatbuf;
4580 len = formatchar(pbuf, sizeof(formatbuf), v);
4581 if (len < 0)
4582 goto error;
4583 break;
4584 default:
4585 PyErr_Format(PyExc_ValueError,
4586 "unsupported format character '%c' (0x%x) "
4587 "at index %zd",
4588 c, c,
4589 (Py_ssize_t)(fmt - 1 -
4590 PyString_AsString(format)));
4591 goto error;
4592 }
4593 if (sign) {
4594 if (*pbuf == '-' || *pbuf == '+') {
4595 sign = *pbuf++;
4596 len--;
4597 }
4598 else if (flags & F_SIGN)
4599 sign = '+';
4600 else if (flags & F_BLANK)
4601 sign = ' ';
4602 else
4603 sign = 0;
4604 }
4605 if (width < len)
4606 width = len;
4607 if (rescnt - (sign != 0) < width) {
4608 reslen -= rescnt;
4609 rescnt = width + fmtcnt + 100;
4610 reslen += rescnt;
4611 if (reslen < 0) {
4612 Py_DECREF(result);
4613 Py_XDECREF(temp);
4614 return PyErr_NoMemory();
4615 }
4616 if (_PyString_Resize(&result, reslen)) {
4617 Py_XDECREF(temp);
4618 return NULL;
4619 }
4620 res = PyString_AS_STRING(result)
4621 + reslen - rescnt;
4622 }
4623 if (sign) {
4624 if (fill != ' ')
4625 *res++ = sign;
4626 rescnt--;
4627 if (width > len)
4628 width--;
4629 }
4630 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4631 assert(pbuf[0] == '0');
4632 assert(pbuf[1] == c);
4633 if (fill != ' ') {
4634 *res++ = *pbuf++;
4635 *res++ = *pbuf++;
4636 }
4637 rescnt -= 2;
4638 width -= 2;
4639 if (width < 0)
4640 width = 0;
4641 len -= 2;
4642 }
4643 if (width > len && !(flags & F_LJUST)) {
4644 do {
4645 --rescnt;
4646 *res++ = fill;
4647 } while (--width > len);
4648 }
4649 if (fill == ' ') {
4650 if (sign)
4651 *res++ = sign;
4652 if ((flags & F_ALT) &&
4653 (c == 'x' || c == 'X')) {
4654 assert(pbuf[0] == '0');
4655 assert(pbuf[1] == c);
4656 *res++ = *pbuf++;
4657 *res++ = *pbuf++;
4658 }
4659 }
4660 Py_MEMCPY(res, pbuf, len);
4661 res += len;
4662 rescnt -= len;
4663 while (--width >= len) {
4664 --rescnt;
4665 *res++ = ' ';
4666 }
4667 if (dict && (argidx < arglen) && c != '%') {
4668 PyErr_SetString(PyExc_TypeError,
4669 "not all arguments converted during string formatting");
4670 Py_XDECREF(temp);
4671 goto error;
4672 }
4673 Py_XDECREF(temp);
4674 } /* '%' */
4675 } /* until end */
4676 if (argidx < arglen && !dict) {
4677 PyErr_SetString(PyExc_TypeError,
4678 "not all arguments converted during string formatting");
4679 goto error;
4680 }
4681 if (args_owned) {
4682 Py_DECREF(args);
4683 }
4684 if (_PyString_Resize(&result, reslen - rescnt))
4685 return NULL;
4686 return result;
4687
4688 #ifdef Py_USING_UNICODE
4689 unicode:
4690 if (args_owned) {
4691 Py_DECREF(args);
4692 args_owned = 0;
4693 }
4694 /* Fiddle args right (remove the first argidx arguments) */
4695 if (PyTuple_Check(orig_args) && argidx > 0) {
4696 PyObject *v;
4697 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4698 v = PyTuple_New(n);
4699 if (v == NULL)
4700 goto error;
4701 while (--n >= 0) {
4702 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4703 Py_INCREF(w);
4704 PyTuple_SET_ITEM(v, n, w);
4705 }
4706 args = v;
4707 } else {
4708 Py_INCREF(orig_args);
4709 args = orig_args;
4710 }
4711 args_owned = 1;
4712 /* Take what we have of the result and let the Unicode formatting
4713 function format the rest of the input. */
4714 rescnt = res - PyString_AS_STRING(result);
4715 if (_PyString_Resize(&result, rescnt))
4716 goto error;
4717 fmtcnt = PyString_GET_SIZE(format) - \
4718 (fmt - PyString_AS_STRING(format));
4719 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4720 if (format == NULL)
4721 goto error;
4722 v = PyUnicode_Format(format, args);
4723 Py_DECREF(format);
4724 if (v == NULL)
4725 goto error;
4726 /* Paste what we have (result) to what the Unicode formatting
4727 function returned (v) and return the result (or error) */
4728 w = PyUnicode_Concat(result, v);
4729 Py_DECREF(result);
4730 Py_DECREF(v);
4731 Py_DECREF(args);
4732 return w;
4733 #endif /* Py_USING_UNICODE */
4734
4735 error:
4736 Py_DECREF(result);
4737 if (args_owned) {
4738 Py_DECREF(args);
4739 }
4740 return NULL;
4741 }
4742
4743 void
PyString_InternInPlace(PyObject ** p)4744 PyString_InternInPlace(PyObject **p)
4745 {
4746 register PyStringObject *s = (PyStringObject *)(*p);
4747 PyObject *t;
4748 if (s == NULL || !PyString_Check(s))
4749 Py_FatalError("PyString_InternInPlace: strings only please!");
4750 /* If it's a string subclass, we don't really know what putting
4751 it in the interned dict might do. */
4752 if (!PyString_CheckExact(s))
4753 return;
4754 if (PyString_CHECK_INTERNED(s))
4755 return;
4756 if (interned == NULL) {
4757 interned = PyDict_New();
4758 if (interned == NULL) {
4759 PyErr_Clear(); /* Don't leave an exception */
4760 return;
4761 }
4762 }
4763 t = PyDict_GetItem(interned, (PyObject *)s);
4764 if (t) {
4765 Py_INCREF(t);
4766 Py_SETREF(*p, t);
4767 return;
4768 }
4769
4770 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4771 PyErr_Clear();
4772 return;
4773 }
4774 /* The two references in interned are not counted by refcnt.
4775 The string deallocator will take care of this */
4776 Py_REFCNT(s) -= 2;
4777 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4778 }
4779
4780 void
PyString_InternImmortal(PyObject ** p)4781 PyString_InternImmortal(PyObject **p)
4782 {
4783 PyString_InternInPlace(p);
4784 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4785 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4786 Py_INCREF(*p);
4787 }
4788 }
4789
4790
4791 PyObject *
PyString_InternFromString(const char * cp)4792 PyString_InternFromString(const char *cp)
4793 {
4794 PyObject *s = PyString_FromString(cp);
4795 if (s == NULL)
4796 return NULL;
4797 PyString_InternInPlace(&s);
4798 return s;
4799 }
4800
4801 void
PyString_Fini(void)4802 PyString_Fini(void)
4803 {
4804 int i;
4805 for (i = 0; i < UCHAR_MAX + 1; i++)
4806 Py_CLEAR(characters[i]);
4807 Py_CLEAR(nullstring);
4808 }
4809
_Py_ReleaseInternedStrings(void)4810 void _Py_ReleaseInternedStrings(void)
4811 {
4812 PyObject *keys;
4813 PyStringObject *s;
4814 Py_ssize_t i, n;
4815 Py_ssize_t immortal_size = 0, mortal_size = 0;
4816
4817 if (interned == NULL || !PyDict_Check(interned))
4818 return;
4819 keys = PyDict_Keys(interned);
4820 if (keys == NULL || !PyList_Check(keys)) {
4821 PyErr_Clear();
4822 return;
4823 }
4824
4825 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4826 detector, interned strings are not forcibly deallocated; rather, we
4827 give them their stolen references back, and then clear and DECREF
4828 the interned dict. */
4829
4830 n = PyList_GET_SIZE(keys);
4831 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4832 n);
4833 for (i = 0; i < n; i++) {
4834 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4835 switch (s->ob_sstate) {
4836 case SSTATE_NOT_INTERNED:
4837 /* XXX Shouldn't happen */
4838 break;
4839 case SSTATE_INTERNED_IMMORTAL:
4840 Py_REFCNT(s) += 1;
4841 immortal_size += Py_SIZE(s);
4842 break;
4843 case SSTATE_INTERNED_MORTAL:
4844 Py_REFCNT(s) += 2;
4845 mortal_size += Py_SIZE(s);
4846 break;
4847 default:
4848 Py_FatalError("Inconsistent interned string state.");
4849 }
4850 s->ob_sstate = SSTATE_NOT_INTERNED;
4851 }
4852 fprintf(stderr, "total size of all interned strings: "
4853 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4854 "mortal/immortal\n", mortal_size, immortal_size);
4855 Py_DECREF(keys);
4856 PyDict_Clear(interned);
4857 Py_CLEAR(interned);
4858 }
4859