1 /* String (str/bytes) object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34 /*
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
37
38 For PyString_FromStringAndSize(), the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67 null_strings++;
68 #endif
69 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
75 #ifdef COUNT_ALLOCS
76 one_strings++;
77 #endif
78 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
86
87 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
112 }
113
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117 register size_t size;
118 register PyStringObject *op;
119
120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129 null_strings++;
130 #endif
131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136 one_strings++;
137 #endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141
142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165 }
166
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
175
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef __va_copy
180 __va_copy(count, vargs);
181 #else
182 count = vargs;
183 #endif
184 #endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag = 0;
190 #endif
191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
194
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
202 #ifdef HAVE_LONG_LONG
203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
208 #endif
209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
213
214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
231 #endif
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
237
238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
265 expand:
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
272
273 s = PyString_AsString(string);
274
275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281 int longlongflag = 0;
282 #endif
283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
303 #ifdef HAVE_LONG_LONG
304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
309 #endif
310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
316
317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
328 #endif
329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
344 #endif
345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
392
393 end:
394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
397 }
398
399 PyObject *
PyString_FromFormat(const char * format,...)400 PyString_FromFormat(const char *format, ...)
401 {
402 PyObject* ret;
403 va_list vargs;
404
405 #ifdef HAVE_STDARG_PROTOTYPES
406 va_start(vargs, format);
407 #else
408 va_start(vargs);
409 #endif
410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
413 }
414
415
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)416 PyObject *PyString_Decode(const char *s,
417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
420 {
421 PyObject *v, *str;
422
423 str = PyString_FromStringAndSize(s, size);
424 if (str == NULL)
425 return NULL;
426 v = PyString_AsDecodedString(str, encoding, errors);
427 Py_DECREF(str);
428 return v;
429 }
430
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)431 PyObject *PyString_AsDecodedObject(PyObject *str,
432 const char *encoding,
433 const char *errors)
434 {
435 PyObject *v;
436
437 if (!PyString_Check(str)) {
438 PyErr_BadArgument();
439 goto onError;
440 }
441
442 if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444 encoding = PyUnicode_GetDefaultEncoding();
445 #else
446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
448 #endif
449 }
450
451 /* Decode via the codec registry */
452 v = _PyCodec_DecodeText(str, encoding, errors);
453 if (v == NULL)
454 goto onError;
455
456 return v;
457
458 onError:
459 return NULL;
460 }
461
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)462 PyObject *PyString_AsDecodedString(PyObject *str,
463 const char *encoding,
464 const char *errors)
465 {
466 PyObject *v;
467
468 v = PyString_AsDecodedObject(str, encoding, errors);
469 if (v == NULL)
470 goto onError;
471
472 #ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
480 }
481 #endif
482 if (!PyString_Check(v)) {
483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
488 }
489
490 return v;
491
492 onError:
493 return NULL;
494 }
495
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)496 PyObject *PyString_Encode(const char *s,
497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
500 {
501 PyObject *v, *str;
502
503 str = PyString_FromStringAndSize(s, size);
504 if (str == NULL)
505 return NULL;
506 v = PyString_AsEncodedString(str, encoding, errors);
507 Py_DECREF(str);
508 return v;
509 }
510
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)511 PyObject *PyString_AsEncodedObject(PyObject *str,
512 const char *encoding,
513 const char *errors)
514 {
515 PyObject *v;
516
517 if (!PyString_Check(str)) {
518 PyErr_BadArgument();
519 goto onError;
520 }
521
522 if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524 encoding = PyUnicode_GetDefaultEncoding();
525 #else
526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
528 #endif
529 }
530
531 /* Encode via the codec registry */
532 v = _PyCodec_EncodeText(str, encoding, errors);
533 if (v == NULL)
534 goto onError;
535
536 return v;
537
538 onError:
539 return NULL;
540 }
541
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)542 PyObject *PyString_AsEncodedString(PyObject *str,
543 const char *encoding,
544 const char *errors)
545 {
546 PyObject *v;
547
548 v = PyString_AsEncodedObject(str, encoding, errors);
549 if (v == NULL)
550 goto onError;
551
552 #ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
560 }
561 #endif
562 if (!PyString_Check(v)) {
563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
568 }
569
570 return v;
571
572 onError:
573 return NULL;
574 }
575
576 static void
string_dealloc(PyObject * op)577 string_dealloc(PyObject *op)
578 {
579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
582
583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
590
591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
593
594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
598 }
599
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)605 PyObject *PyString_DecodeEscape(const char *s,
606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
610 {
611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
624 #ifdef Py_USING_UNICODE
625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
635
636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
641
642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
653 #else
654 *p++ = *s++;
655 #endif
656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
729 /* skip \x */
730 if (s < end && isxdigit(Py_CHARMASK(s[0])))
731 s++; /* and a hexdigit */
732 break;
733 #ifndef Py_USING_UNICODE
734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
743 #endif
744 default:
745 *p++ = '\\';
746 s--;
747 goto non_esc; /* an arbitrary number of unescaped
748 UTF-8 bytes may follow. */
749 }
750 }
751 if (p-buf < newlen)
752 _PyString_Resize(&v, p - buf); /* v is cleared on error */
753 return v;
754 failed:
755 Py_DECREF(v);
756 return NULL;
757 }
758
759 /* -------------------------------------------------------------------- */
760 /* object api */
761
762 static Py_ssize_t
string_getsize(register PyObject * op)763 string_getsize(register PyObject *op)
764 {
765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
770 }
771
772 static /*const*/ char *
string_getbuffer(register PyObject * op)773 string_getbuffer(register PyObject *op)
774 {
775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
780 }
781
782 Py_ssize_t
PyString_Size(register PyObject * op)783 PyString_Size(register PyObject *op)
784 {
785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
788 }
789
790 /*const*/ char *
PyString_AsString(register PyObject * op)791 PyString_AsString(register PyObject *op)
792 {
793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
796 }
797
798 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)799 PyString_AsStringAndSize(register PyObject *obj,
800 register char **s,
801 register Py_ssize_t *len)
802 {
803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
807
808 if (!PyString_Check(obj)) {
809 #ifdef Py_USING_UNICODE
810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
816 #endif
817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
824
825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
834 }
835
836 /* -------------------------------------------------------------------- */
837 /* Methods */
838
839 #include "stringlib/stringdefs.h"
840 #include "stringlib/fastsearch.h"
841
842 #include "stringlib/count.h"
843 #include "stringlib/find.h"
844 #include "stringlib/partition.h"
845 #include "stringlib/split.h"
846
847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848 #include "stringlib/localeutil.h"
849
850
851
852 static int
string_print(PyStringObject * op,FILE * fp,int flags)853 string_print(PyStringObject *op, FILE *fp, int flags)
854 {
855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
858
859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
884 #ifdef __VMS
885 if (size) fwrite(data, (size_t)size, 1, fp);
886 #else
887 fwrite(data, 1, (size_t)size, fp);
888 #endif
889 Py_END_ALLOW_THREADS
890 return 0;
891 }
892
893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
898
899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the internal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
923 }
924
925 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)926 PyString_Repr(PyObject *obj, int smartquotes)
927 {
928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize;
930 PyObject *v;
931 if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
936 newsize = 2 + 4*Py_SIZE(op);
937 v = PyString_FromStringAndSize((char *)NULL, newsize);
938 if (v == NULL) {
939 return NULL;
940 }
941 else {
942 register Py_ssize_t i;
943 register char c;
944 register char *p;
945 int quote;
946
947 /* figure out which quote to use; single is preferred */
948 quote = '\'';
949 if (smartquotes &&
950 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
951 !memchr(op->ob_sval, '"', Py_SIZE(op)))
952 quote = '"';
953
954 p = PyString_AS_STRING(v);
955 *p++ = quote;
956 for (i = 0; i < Py_SIZE(op); i++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
960 c = op->ob_sval[i];
961 if (c == quote || c == '\\')
962 *p++ = '\\', *p++ = c;
963 else if (c == '\t')
964 *p++ = '\\', *p++ = 't';
965 else if (c == '\n')
966 *p++ = '\\', *p++ = 'n';
967 else if (c == '\r')
968 *p++ = '\\', *p++ = 'r';
969 else if (c < ' ' || c >= 0x7f) {
970 /* For performance, we don't want to call
971 PyOS_snprintf here (extra layers of
972 function call). */
973 sprintf(p, "\\x%02x", c & 0xff);
974 p += 4;
975 }
976 else
977 *p++ = c;
978 }
979 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
980 *p++ = quote;
981 *p = '\0';
982 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
983 return NULL;
984 return v;
985 }
986 }
987
988 static PyObject *
string_repr(PyObject * op)989 string_repr(PyObject *op)
990 {
991 return PyString_Repr(op, 1);
992 }
993
994 static PyObject *
string_str(PyObject * s)995 string_str(PyObject *s)
996 {
997 assert(PyString_Check(s));
998 if (PyString_CheckExact(s)) {
999 Py_INCREF(s);
1000 return s;
1001 }
1002 else {
1003 /* Subtype -- return genuine string with the same value. */
1004 PyStringObject *t = (PyStringObject *) s;
1005 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1006 }
1007 }
1008
1009 static Py_ssize_t
string_length(PyStringObject * a)1010 string_length(PyStringObject *a)
1011 {
1012 return Py_SIZE(a);
1013 }
1014
1015 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1016 string_concat(register PyStringObject *a, register PyObject *bb)
1017 {
1018 register Py_ssize_t size;
1019 register PyStringObject *op;
1020 if (!PyString_Check(bb)) {
1021 #ifdef Py_USING_UNICODE
1022 if (PyUnicode_Check(bb))
1023 return PyUnicode_Concat((PyObject *)a, bb);
1024 #endif
1025 if (PyByteArray_Check(bb))
1026 return PyByteArray_Concat((PyObject *)a, bb);
1027 PyErr_Format(PyExc_TypeError,
1028 "cannot concatenate 'str' and '%.200s' objects",
1029 Py_TYPE(bb)->tp_name);
1030 return NULL;
1031 }
1032 #define b ((PyStringObject *)bb)
1033 /* Optimize cases with empty left or right operand */
1034 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1035 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1036 if (Py_SIZE(a) == 0) {
1037 Py_INCREF(bb);
1038 return bb;
1039 }
1040 Py_INCREF(a);
1041 return (PyObject *)a;
1042 }
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
1053 size = Py_SIZE(a) + Py_SIZE(b);
1054
1055 /* Inline PyObject_NewVar */
1056 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1057 PyErr_SetString(PyExc_OverflowError,
1058 "strings are too large to concat");
1059 return NULL;
1060 }
1061 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1062 if (op == NULL)
1063 return PyErr_NoMemory();
1064 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1065 op->ob_shash = -1;
1066 op->ob_sstate = SSTATE_NOT_INTERNED;
1067 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1068 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1069 op->ob_sval[size] = '\0';
1070 return (PyObject *) op;
1071 #undef b
1072 }
1073
1074 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1075 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1076 {
1077 register Py_ssize_t i;
1078 register Py_ssize_t j;
1079 register Py_ssize_t size;
1080 register PyStringObject *op;
1081 size_t nbytes;
1082 if (n < 0)
1083 n = 0;
1084 /* watch out for overflows: the size can overflow Py_ssize_t,
1085 * and the # of bytes needed can overflow size_t
1086 */
1087 if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
1092 size = Py_SIZE(a) * n;
1093 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1094 Py_INCREF(a);
1095 return (PyObject *)a;
1096 }
1097 nbytes = (size_t)size;
1098 if (nbytes + PyStringObject_SIZE <= nbytes) {
1099 PyErr_SetString(PyExc_OverflowError,
1100 "repeated string is too long");
1101 return NULL;
1102 }
1103 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1104 if (op == NULL)
1105 return PyErr_NoMemory();
1106 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1107 op->ob_shash = -1;
1108 op->ob_sstate = SSTATE_NOT_INTERNED;
1109 op->ob_sval[size] = '\0';
1110 if (Py_SIZE(a) == 1 && n > 0) {
1111 memset(op->ob_sval, a->ob_sval[0] , n);
1112 return (PyObject *) op;
1113 }
1114 i = 0;
1115 if (i < size) {
1116 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1117 i = Py_SIZE(a);
1118 }
1119 while (i < size) {
1120 j = (i <= size-i) ? i : size-i;
1121 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1122 i += j;
1123 }
1124 return (PyObject *) op;
1125 }
1126
1127 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128
1129 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1130 string_slice(register PyStringObject *a, register Py_ssize_t i,
1131 register Py_ssize_t j)
1132 /* j -- may be negative! */
1133 {
1134 if (i < 0)
1135 i = 0;
1136 if (j < 0)
1137 j = 0; /* Avoid signed/unsigned bug in next line */
1138 if (j > Py_SIZE(a))
1139 j = Py_SIZE(a);
1140 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1141 /* It's the same as a */
1142 Py_INCREF(a);
1143 return (PyObject *)a;
1144 }
1145 if (j < i)
1146 j = i;
1147 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1148 }
1149
1150 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1151 string_contains(PyObject *str_obj, PyObject *sub_obj)
1152 {
1153 if (!PyString_CheckExact(sub_obj)) {
1154 #ifdef Py_USING_UNICODE
1155 if (PyUnicode_Check(sub_obj))
1156 return PyUnicode_Contains(str_obj, sub_obj);
1157 #endif
1158 if (!PyString_Check(sub_obj)) {
1159 PyErr_Format(PyExc_TypeError,
1160 "'in <string>' requires string as left operand, "
1161 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1162 return -1;
1163 }
1164 }
1165
1166 return stringlib_contains_obj(str_obj, sub_obj);
1167 }
1168
1169 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1170 string_item(PyStringObject *a, register Py_ssize_t i)
1171 {
1172 char pchar;
1173 PyObject *v;
1174 if (i < 0 || i >= Py_SIZE(a)) {
1175 PyErr_SetString(PyExc_IndexError, "string index out of range");
1176 return NULL;
1177 }
1178 pchar = a->ob_sval[i];
1179 v = (PyObject *)characters[pchar & UCHAR_MAX];
1180 if (v == NULL)
1181 v = PyString_FromStringAndSize(&pchar, 1);
1182 else {
1183 #ifdef COUNT_ALLOCS
1184 one_strings++;
1185 #endif
1186 Py_INCREF(v);
1187 }
1188 return v;
1189 }
1190
1191 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1192 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1193 {
1194 int c;
1195 Py_ssize_t len_a, len_b;
1196 Py_ssize_t min_len;
1197 PyObject *result;
1198
1199 /* Make sure both arguments are strings. */
1200 if (!(PyString_Check(a) && PyString_Check(b))) {
1201 result = Py_NotImplemented;
1202 goto out;
1203 }
1204 if (a == b) {
1205 switch (op) {
1206 case Py_EQ:case Py_LE:case Py_GE:
1207 result = Py_True;
1208 goto out;
1209 case Py_NE:case Py_LT:case Py_GT:
1210 result = Py_False;
1211 goto out;
1212 }
1213 }
1214 if (op == Py_EQ) {
1215 /* Supporting Py_NE here as well does not save
1216 much time, since Py_NE is rarely used. */
1217 if (Py_SIZE(a) == Py_SIZE(b)
1218 && (a->ob_sval[0] == b->ob_sval[0]
1219 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1220 result = Py_True;
1221 } else {
1222 result = Py_False;
1223 }
1224 goto out;
1225 }
1226 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1227 min_len = (len_a < len_b) ? len_a : len_b;
1228 if (min_len > 0) {
1229 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1230 if (c==0)
1231 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1232 } else
1233 c = 0;
1234 if (c == 0)
1235 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1236 switch (op) {
1237 case Py_LT: c = c < 0; break;
1238 case Py_LE: c = c <= 0; break;
1239 case Py_EQ: assert(0); break; /* unreachable */
1240 case Py_NE: c = c != 0; break;
1241 case Py_GT: c = c > 0; break;
1242 case Py_GE: c = c >= 0; break;
1243 default:
1244 result = Py_NotImplemented;
1245 goto out;
1246 }
1247 result = c ? Py_True : Py_False;
1248 out:
1249 Py_INCREF(result);
1250 return result;
1251 }
1252
1253 int
_PyString_Eq(PyObject * o1,PyObject * o2)1254 _PyString_Eq(PyObject *o1, PyObject *o2)
1255 {
1256 PyStringObject *a = (PyStringObject*) o1;
1257 PyStringObject *b = (PyStringObject*) o2;
1258 return Py_SIZE(a) == Py_SIZE(b)
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260 }
1261
1262 static long
string_hash(PyStringObject * a)1263 string_hash(PyStringObject *a)
1264 {
1265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
1268
1269 #ifdef Py_DEBUG
1270 assert(_Py_HashSecret_Initialized);
1271 #endif
1272 if (a->ob_shash != -1)
1273 return a->ob_shash;
1274 len = Py_SIZE(a);
1275 /*
1276 We make the hash of the empty string be 0, rather than using
1277 (prefix ^ suffix), since this slightly obfuscates the hash secret
1278 */
1279 if (len == 0) {
1280 a->ob_shash = 0;
1281 return 0;
1282 }
1283 p = (unsigned char *) a->ob_sval;
1284 x = _Py_HashSecret.prefix;
1285 x ^= *p << 7;
1286 while (--len >= 0)
1287 x = (1000003*x) ^ *p++;
1288 x ^= Py_SIZE(a);
1289 x ^= _Py_HashSecret.suffix;
1290 if (x == -1)
1291 x = -2;
1292 a->ob_shash = x;
1293 return x;
1294 }
1295
1296 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1297 string_subscript(PyStringObject* self, PyObject* item)
1298 {
1299 if (PyIndex_Check(item)) {
1300 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301 if (i == -1 && PyErr_Occurred())
1302 return NULL;
1303 if (i < 0)
1304 i += PyString_GET_SIZE(self);
1305 return string_item(self, i);
1306 }
1307 else if (PySlice_Check(item)) {
1308 Py_ssize_t start, stop, step, slicelength, cur, i;
1309 char* source_buf;
1310 char* result_buf;
1311 PyObject* result;
1312
1313 if (PySlice_GetIndicesEx((PySliceObject*)item,
1314 PyString_GET_SIZE(self),
1315 &start, &stop, &step, &slicelength) < 0) {
1316 return NULL;
1317 }
1318
1319 if (slicelength <= 0) {
1320 return PyString_FromStringAndSize("", 0);
1321 }
1322 else if (start == 0 && step == 1 &&
1323 slicelength == PyString_GET_SIZE(self) &&
1324 PyString_CheckExact(self)) {
1325 Py_INCREF(self);
1326 return (PyObject *)self;
1327 }
1328 else if (step == 1) {
1329 return PyString_FromStringAndSize(
1330 PyString_AS_STRING(self) + start,
1331 slicelength);
1332 }
1333 else {
1334 source_buf = PyString_AsString((PyObject*)self);
1335 result_buf = (char *)PyMem_Malloc(slicelength);
1336 if (result_buf == NULL)
1337 return PyErr_NoMemory();
1338
1339 for (cur = start, i = 0; i < slicelength;
1340 cur += step, i++) {
1341 result_buf[i] = source_buf[cur];
1342 }
1343
1344 result = PyString_FromStringAndSize(result_buf,
1345 slicelength);
1346 PyMem_Free(result_buf);
1347 return result;
1348 }
1349 }
1350 else {
1351 PyErr_Format(PyExc_TypeError,
1352 "string indices must be integers, not %.200s",
1353 Py_TYPE(item)->tp_name);
1354 return NULL;
1355 }
1356 }
1357
1358 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1359 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1360 {
1361 if ( index != 0 ) {
1362 PyErr_SetString(PyExc_SystemError,
1363 "accessing non-existent string segment");
1364 return -1;
1365 }
1366 *ptr = (void *)self->ob_sval;
1367 return Py_SIZE(self);
1368 }
1369
1370 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1371 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372 {
1373 PyErr_SetString(PyExc_TypeError,
1374 "Cannot use string as modifiable buffer");
1375 return -1;
1376 }
1377
1378 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1379 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1380 {
1381 if ( lenp )
1382 *lenp = Py_SIZE(self);
1383 return 1;
1384 }
1385
1386 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1387 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1388 {
1389 if ( index != 0 ) {
1390 PyErr_SetString(PyExc_SystemError,
1391 "accessing non-existent string segment");
1392 return -1;
1393 }
1394 *ptr = self->ob_sval;
1395 return Py_SIZE(self);
1396 }
1397
1398 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1399 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1400 {
1401 return PyBuffer_FillInfo(view, (PyObject*)self,
1402 (void *)self->ob_sval, Py_SIZE(self),
1403 1, flags);
1404 }
1405
1406 static PySequenceMethods string_as_sequence = {
1407 (lenfunc)string_length, /*sq_length*/
1408 (binaryfunc)string_concat, /*sq_concat*/
1409 (ssizeargfunc)string_repeat, /*sq_repeat*/
1410 (ssizeargfunc)string_item, /*sq_item*/
1411 (ssizessizeargfunc)string_slice, /*sq_slice*/
1412 0, /*sq_ass_item*/
1413 0, /*sq_ass_slice*/
1414 (objobjproc)string_contains /*sq_contains*/
1415 };
1416
1417 static PyMappingMethods string_as_mapping = {
1418 (lenfunc)string_length,
1419 (binaryfunc)string_subscript,
1420 0,
1421 };
1422
1423 static PyBufferProcs string_as_buffer = {
1424 (readbufferproc)string_buffer_getreadbuf,
1425 (writebufferproc)string_buffer_getwritebuf,
1426 (segcountproc)string_buffer_getsegcount,
1427 (charbufferproc)string_buffer_getcharbuf,
1428 (getbufferproc)string_buffer_getbuffer,
1429 0, /* XXX */
1430 };
1431
1432
1433
1434 #define LEFTSTRIP 0
1435 #define RIGHTSTRIP 1
1436 #define BOTHSTRIP 2
1437
1438 /* Arrays indexed by above */
1439 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440
1441 #define STRIPNAME(i) (stripformat[i]+3)
1442
1443 PyDoc_STRVAR(split__doc__,
1444 "S.split([sep [,maxsplit]]) -> list of strings\n\
1445 \n\
1446 Return a list of the words in the string S, using sep as the\n\
1447 delimiter string. If maxsplit is given, at most maxsplit\n\
1448 splits are done. If sep is not specified or is None, any\n\
1449 whitespace string is a separator and empty strings are removed\n\
1450 from the result.");
1451
1452 static PyObject *
string_split(PyStringObject * self,PyObject * args)1453 string_split(PyStringObject *self, PyObject *args)
1454 {
1455 Py_ssize_t len = PyString_GET_SIZE(self), n;
1456 Py_ssize_t maxsplit = -1;
1457 const char *s = PyString_AS_STRING(self), *sub;
1458 PyObject *subobj = Py_None;
1459
1460 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461 return NULL;
1462 if (maxsplit < 0)
1463 maxsplit = PY_SSIZE_T_MAX;
1464 if (subobj == Py_None)
1465 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466 if (PyString_Check(subobj)) {
1467 sub = PyString_AS_STRING(subobj);
1468 n = PyString_GET_SIZE(subobj);
1469 }
1470 #ifdef Py_USING_UNICODE
1471 else if (PyUnicode_Check(subobj))
1472 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1473 #endif
1474 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475 return NULL;
1476
1477 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1478 }
1479
1480 PyDoc_STRVAR(partition__doc__,
1481 "S.partition(sep) -> (head, sep, tail)\n\
1482 \n\
1483 Search for the separator sep in S, and return the part before it,\n\
1484 the separator itself, and the part after it. If the separator is not\n\
1485 found, return S and two empty strings.");
1486
1487 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1488 string_partition(PyStringObject *self, PyObject *sep_obj)
1489 {
1490 const char *sep;
1491 Py_ssize_t sep_len;
1492
1493 if (PyString_Check(sep_obj)) {
1494 sep = PyString_AS_STRING(sep_obj);
1495 sep_len = PyString_GET_SIZE(sep_obj);
1496 }
1497 #ifdef Py_USING_UNICODE
1498 else if (PyUnicode_Check(sep_obj))
1499 return PyUnicode_Partition((PyObject *) self, sep_obj);
1500 #endif
1501 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502 return NULL;
1503
1504 return stringlib_partition(
1505 (PyObject*) self,
1506 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507 sep_obj, sep, sep_len
1508 );
1509 }
1510
1511 PyDoc_STRVAR(rpartition__doc__,
1512 "S.rpartition(sep) -> (head, sep, tail)\n\
1513 \n\
1514 Search for the separator sep in S, starting at the end of S, and return\n\
1515 the part before it, the separator itself, and the part after it. If the\n\
1516 separator is not found, return two empty strings and S.");
1517
1518 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1519 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1520 {
1521 const char *sep;
1522 Py_ssize_t sep_len;
1523
1524 if (PyString_Check(sep_obj)) {
1525 sep = PyString_AS_STRING(sep_obj);
1526 sep_len = PyString_GET_SIZE(sep_obj);
1527 }
1528 #ifdef Py_USING_UNICODE
1529 else if (PyUnicode_Check(sep_obj))
1530 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1531 #endif
1532 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533 return NULL;
1534
1535 return stringlib_rpartition(
1536 (PyObject*) self,
1537 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538 sep_obj, sep, sep_len
1539 );
1540 }
1541
1542 PyDoc_STRVAR(rsplit__doc__,
1543 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1544 \n\
1545 Return a list of the words in the string S, using sep as the\n\
1546 delimiter string, starting at the end of the string and working\n\
1547 to the front. If maxsplit is given, at most maxsplit splits are\n\
1548 done. If sep is not specified or is None, any whitespace string\n\
1549 is a separator.");
1550
1551 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1552 string_rsplit(PyStringObject *self, PyObject *args)
1553 {
1554 Py_ssize_t len = PyString_GET_SIZE(self), n;
1555 Py_ssize_t maxsplit = -1;
1556 const char *s = PyString_AS_STRING(self), *sub;
1557 PyObject *subobj = Py_None;
1558
1559 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560 return NULL;
1561 if (maxsplit < 0)
1562 maxsplit = PY_SSIZE_T_MAX;
1563 if (subobj == Py_None)
1564 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565 if (PyString_Check(subobj)) {
1566 sub = PyString_AS_STRING(subobj);
1567 n = PyString_GET_SIZE(subobj);
1568 }
1569 #ifdef Py_USING_UNICODE
1570 else if (PyUnicode_Check(subobj))
1571 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1572 #endif
1573 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574 return NULL;
1575
1576 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1577 }
1578
1579
1580 PyDoc_STRVAR(join__doc__,
1581 "S.join(iterable) -> string\n\
1582 \n\
1583 Return a string which is the concatenation of the strings in the\n\
1584 iterable. The separator between elements is S.");
1585
1586 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1587 string_join(PyStringObject *self, PyObject *orig)
1588 {
1589 char *sep = PyString_AS_STRING(self);
1590 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591 PyObject *res = NULL;
1592 char *p;
1593 Py_ssize_t seqlen = 0;
1594 size_t sz = 0;
1595 Py_ssize_t i;
1596 PyObject *seq, *item;
1597
1598 seq = PySequence_Fast(orig, "can only join an iterable");
1599 if (seq == NULL) {
1600 return NULL;
1601 }
1602
1603 seqlen = PySequence_Size(seq);
1604 if (seqlen == 0) {
1605 Py_DECREF(seq);
1606 return PyString_FromString("");
1607 }
1608 if (seqlen == 1) {
1609 item = PySequence_Fast_GET_ITEM(seq, 0);
1610 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611 Py_INCREF(item);
1612 Py_DECREF(seq);
1613 return item;
1614 }
1615 }
1616
1617 /* There are at least two things to join, or else we have a subclass
1618 * of the builtin types in the sequence.
1619 * Do a pre-pass to figure out the total amount of space we'll
1620 * need (sz), see whether any argument is absurd, and defer to
1621 * the Unicode join if appropriate.
1622 */
1623 for (i = 0; i < seqlen; i++) {
1624 const size_t old_sz = sz;
1625 item = PySequence_Fast_GET_ITEM(seq, i);
1626 if (!PyString_Check(item)){
1627 #ifdef Py_USING_UNICODE
1628 if (PyUnicode_Check(item)) {
1629 /* Defer to Unicode join.
1630 * CAUTION: There's no guarantee that the
1631 * original sequence can be iterated over
1632 * again, so we must pass seq here.
1633 */
1634 PyObject *result;
1635 result = PyUnicode_Join((PyObject *)self, seq);
1636 Py_DECREF(seq);
1637 return result;
1638 }
1639 #endif
1640 PyErr_Format(PyExc_TypeError,
1641 "sequence item %zd: expected string,"
1642 " %.80s found",
1643 i, Py_TYPE(item)->tp_name);
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
1647 sz += PyString_GET_SIZE(item);
1648 if (i != 0)
1649 sz += seplen;
1650 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651 PyErr_SetString(PyExc_OverflowError,
1652 "join() result is too long for a Python string");
1653 Py_DECREF(seq);
1654 return NULL;
1655 }
1656 }
1657
1658 /* Allocate result space. */
1659 res = PyString_FromStringAndSize((char*)NULL, sz);
1660 if (res == NULL) {
1661 Py_DECREF(seq);
1662 return NULL;
1663 }
1664
1665 /* Catenate everything. */
1666 p = PyString_AS_STRING(res);
1667 for (i = 0; i < seqlen; ++i) {
1668 size_t n;
1669 item = PySequence_Fast_GET_ITEM(seq, i);
1670 n = PyString_GET_SIZE(item);
1671 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672 p += n;
1673 if (i < seqlen - 1) {
1674 Py_MEMCPY(p, sep, seplen);
1675 p += seplen;
1676 }
1677 }
1678
1679 Py_DECREF(seq);
1680 return res;
1681 }
1682
1683 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1684 _PyString_Join(PyObject *sep, PyObject *x)
1685 {
1686 assert(sep != NULL && PyString_Check(sep));
1687 assert(x != NULL);
1688 return string_join((PyStringObject *)sep, x);
1689 }
1690
1691 /* helper macro to fixup start/end slice values */
1692 #define ADJUST_INDICES(start, end, len) \
1693 if (end > len) \
1694 end = len; \
1695 else if (end < 0) { \
1696 end += len; \
1697 if (end < 0) \
1698 end = 0; \
1699 } \
1700 if (start < 0) { \
1701 start += len; \
1702 if (start < 0) \
1703 start = 0; \
1704 }
1705
1706 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1707 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1708 {
1709 PyObject *subobj;
1710 const char *sub;
1711 Py_ssize_t sub_len;
1712 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1713
1714 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715 args, &subobj, &start, &end))
1716 return -2;
1717
1718 if (PyString_Check(subobj)) {
1719 sub = PyString_AS_STRING(subobj);
1720 sub_len = PyString_GET_SIZE(subobj);
1721 }
1722 #ifdef Py_USING_UNICODE
1723 else if (PyUnicode_Check(subobj))
1724 return PyUnicode_Find(
1725 (PyObject *)self, subobj, start, end, dir);
1726 #endif
1727 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728 /* XXX - the "expected a character buffer object" is pretty
1729 confusing for a non-expert. remap to something else ? */
1730 return -2;
1731
1732 if (dir > 0)
1733 return stringlib_find_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1736 else
1737 return stringlib_rfind_slice(
1738 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739 sub, sub_len, start, end);
1740 }
1741
1742
1743 PyDoc_STRVAR(find__doc__,
1744 "S.find(sub [,start [,end]]) -> int\n\
1745 \n\
1746 Return the lowest index in S where substring sub is found,\n\
1747 such that sub is contained within S[start:end]. Optional\n\
1748 arguments start and end are interpreted as in slice notation.\n\
1749 \n\
1750 Return -1 on failure.");
1751
1752 static PyObject *
string_find(PyStringObject * self,PyObject * args)1753 string_find(PyStringObject *self, PyObject *args)
1754 {
1755 Py_ssize_t result = string_find_internal(self, args, +1);
1756 if (result == -2)
1757 return NULL;
1758 return PyInt_FromSsize_t(result);
1759 }
1760
1761
1762 PyDoc_STRVAR(index__doc__,
1763 "S.index(sub [,start [,end]]) -> int\n\
1764 \n\
1765 Like S.find() but raise ValueError when the substring is not found.");
1766
1767 static PyObject *
string_index(PyStringObject * self,PyObject * args)1768 string_index(PyStringObject *self, PyObject *args)
1769 {
1770 Py_ssize_t result = string_find_internal(self, args, +1);
1771 if (result == -2)
1772 return NULL;
1773 if (result == -1) {
1774 PyErr_SetString(PyExc_ValueError,
1775 "substring not found");
1776 return NULL;
1777 }
1778 return PyInt_FromSsize_t(result);
1779 }
1780
1781
1782 PyDoc_STRVAR(rfind__doc__,
1783 "S.rfind(sub [,start [,end]]) -> int\n\
1784 \n\
1785 Return the highest index in S where substring sub is found,\n\
1786 such that sub is contained within S[start:end]. Optional\n\
1787 arguments start and end are interpreted as in slice notation.\n\
1788 \n\
1789 Return -1 on failure.");
1790
1791 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1792 string_rfind(PyStringObject *self, PyObject *args)
1793 {
1794 Py_ssize_t result = string_find_internal(self, args, -1);
1795 if (result == -2)
1796 return NULL;
1797 return PyInt_FromSsize_t(result);
1798 }
1799
1800
1801 PyDoc_STRVAR(rindex__doc__,
1802 "S.rindex(sub [,start [,end]]) -> int\n\
1803 \n\
1804 Like S.rfind() but raise ValueError when the substring is not found.");
1805
1806 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1807 string_rindex(PyStringObject *self, PyObject *args)
1808 {
1809 Py_ssize_t result = string_find_internal(self, args, -1);
1810 if (result == -2)
1811 return NULL;
1812 if (result == -1) {
1813 PyErr_SetString(PyExc_ValueError,
1814 "substring not found");
1815 return NULL;
1816 }
1817 return PyInt_FromSsize_t(result);
1818 }
1819
1820
1821 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1822 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1823 {
1824 char *s = PyString_AS_STRING(self);
1825 Py_ssize_t len = PyString_GET_SIZE(self);
1826 char *sep = PyString_AS_STRING(sepobj);
1827 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828 Py_ssize_t i, j;
1829
1830 i = 0;
1831 if (striptype != RIGHTSTRIP) {
1832 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833 i++;
1834 }
1835 }
1836
1837 j = len;
1838 if (striptype != LEFTSTRIP) {
1839 do {
1840 j--;
1841 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842 j++;
1843 }
1844
1845 if (i == 0 && j == len && PyString_CheckExact(self)) {
1846 Py_INCREF(self);
1847 return (PyObject*)self;
1848 }
1849 else
1850 return PyString_FromStringAndSize(s+i, j-i);
1851 }
1852
1853
1854 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1855 do_strip(PyStringObject *self, int striptype)
1856 {
1857 char *s = PyString_AS_STRING(self);
1858 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1859
1860 i = 0;
1861 if (striptype != RIGHTSTRIP) {
1862 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863 i++;
1864 }
1865 }
1866
1867 j = len;
1868 if (striptype != LEFTSTRIP) {
1869 do {
1870 j--;
1871 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872 j++;
1873 }
1874
1875 if (i == 0 && j == len && PyString_CheckExact(self)) {
1876 Py_INCREF(self);
1877 return (PyObject*)self;
1878 }
1879 else
1880 return PyString_FromStringAndSize(s+i, j-i);
1881 }
1882
1883
1884 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1885 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1886 {
1887 PyObject *sep = NULL;
1888
1889 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890 return NULL;
1891
1892 if (sep != NULL && sep != Py_None) {
1893 if (PyString_Check(sep))
1894 return do_xstrip(self, striptype, sep);
1895 #ifdef Py_USING_UNICODE
1896 else if (PyUnicode_Check(sep)) {
1897 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898 PyObject *res;
1899 if (uniself==NULL)
1900 return NULL;
1901 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902 striptype, sep);
1903 Py_DECREF(uniself);
1904 return res;
1905 }
1906 #endif
1907 PyErr_Format(PyExc_TypeError,
1908 #ifdef Py_USING_UNICODE
1909 "%s arg must be None, str or unicode",
1910 #else
1911 "%s arg must be None or str",
1912 #endif
1913 STRIPNAME(striptype));
1914 return NULL;
1915 }
1916
1917 return do_strip(self, striptype);
1918 }
1919
1920
1921 PyDoc_STRVAR(strip__doc__,
1922 "S.strip([chars]) -> string or unicode\n\
1923 \n\
1924 Return a copy of the string S with leading and trailing\n\
1925 whitespace removed.\n\
1926 If chars is given and not None, remove characters in chars instead.\n\
1927 If chars is unicode, S will be converted to unicode before stripping");
1928
1929 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1930 string_strip(PyStringObject *self, PyObject *args)
1931 {
1932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, BOTHSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, BOTHSTRIP, args);
1936 }
1937
1938
1939 PyDoc_STRVAR(lstrip__doc__,
1940 "S.lstrip([chars]) -> string or unicode\n\
1941 \n\
1942 Return a copy of the string S with leading whitespace removed.\n\
1943 If chars is given and not None, remove characters in chars instead.\n\
1944 If chars is unicode, S will be converted to unicode before stripping");
1945
1946 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1947 string_lstrip(PyStringObject *self, PyObject *args)
1948 {
1949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, LEFTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, LEFTSTRIP, args);
1953 }
1954
1955
1956 PyDoc_STRVAR(rstrip__doc__,
1957 "S.rstrip([chars]) -> string or unicode\n\
1958 \n\
1959 Return a copy of the string S with trailing whitespace removed.\n\
1960 If chars is given and not None, remove characters in chars instead.\n\
1961 If chars is unicode, S will be converted to unicode before stripping");
1962
1963 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1964 string_rstrip(PyStringObject *self, PyObject *args)
1965 {
1966 if (PyTuple_GET_SIZE(args) == 0)
1967 return do_strip(self, RIGHTSTRIP); /* Common case */
1968 else
1969 return do_argstrip(self, RIGHTSTRIP, args);
1970 }
1971
1972
1973 PyDoc_STRVAR(lower__doc__,
1974 "S.lower() -> string\n\
1975 \n\
1976 Return a copy of the string S converted to lowercase.");
1977
1978 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979 #ifndef _tolower
1980 #define _tolower tolower
1981 #endif
1982
1983 static PyObject *
string_lower(PyStringObject * self)1984 string_lower(PyStringObject *self)
1985 {
1986 char *s;
1987 Py_ssize_t i, n = PyString_GET_SIZE(self);
1988 PyObject *newobj;
1989
1990 newobj = PyString_FromStringAndSize(NULL, n);
1991 if (!newobj)
1992 return NULL;
1993
1994 s = PyString_AS_STRING(newobj);
1995
1996 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1997
1998 for (i = 0; i < n; i++) {
1999 int c = Py_CHARMASK(s[i]);
2000 if (isupper(c))
2001 s[i] = _tolower(c);
2002 }
2003
2004 return newobj;
2005 }
2006
2007 PyDoc_STRVAR(upper__doc__,
2008 "S.upper() -> string\n\
2009 \n\
2010 Return a copy of the string S converted to uppercase.");
2011
2012 #ifndef _toupper
2013 #define _toupper toupper
2014 #endif
2015
2016 static PyObject *
string_upper(PyStringObject * self)2017 string_upper(PyStringObject *self)
2018 {
2019 char *s;
2020 Py_ssize_t i, n = PyString_GET_SIZE(self);
2021 PyObject *newobj;
2022
2023 newobj = PyString_FromStringAndSize(NULL, n);
2024 if (!newobj)
2025 return NULL;
2026
2027 s = PyString_AS_STRING(newobj);
2028
2029 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2030
2031 for (i = 0; i < n; i++) {
2032 int c = Py_CHARMASK(s[i]);
2033 if (islower(c))
2034 s[i] = _toupper(c);
2035 }
2036
2037 return newobj;
2038 }
2039
2040 PyDoc_STRVAR(title__doc__,
2041 "S.title() -> string\n\
2042 \n\
2043 Return a titlecased version of S, i.e. words start with uppercase\n\
2044 characters, all remaining cased characters have lowercase.");
2045
2046 static PyObject*
string_title(PyStringObject * self)2047 string_title(PyStringObject *self)
2048 {
2049 char *s = PyString_AS_STRING(self), *s_new;
2050 Py_ssize_t i, n = PyString_GET_SIZE(self);
2051 int previous_is_cased = 0;
2052 PyObject *newobj;
2053
2054 newobj = PyString_FromStringAndSize(NULL, n);
2055 if (newobj == NULL)
2056 return NULL;
2057 s_new = PyString_AsString(newobj);
2058 for (i = 0; i < n; i++) {
2059 int c = Py_CHARMASK(*s++);
2060 if (islower(c)) {
2061 if (!previous_is_cased)
2062 c = toupper(c);
2063 previous_is_cased = 1;
2064 } else if (isupper(c)) {
2065 if (previous_is_cased)
2066 c = tolower(c);
2067 previous_is_cased = 1;
2068 } else
2069 previous_is_cased = 0;
2070 *s_new++ = c;
2071 }
2072 return newobj;
2073 }
2074
2075 PyDoc_STRVAR(capitalize__doc__,
2076 "S.capitalize() -> string\n\
2077 \n\
2078 Return a copy of the string S with only its first character\n\
2079 capitalized.");
2080
2081 static PyObject *
string_capitalize(PyStringObject * self)2082 string_capitalize(PyStringObject *self)
2083 {
2084 char *s = PyString_AS_STRING(self), *s_new;
2085 Py_ssize_t i, n = PyString_GET_SIZE(self);
2086 PyObject *newobj;
2087
2088 newobj = PyString_FromStringAndSize(NULL, n);
2089 if (newobj == NULL)
2090 return NULL;
2091 s_new = PyString_AsString(newobj);
2092 if (0 < n) {
2093 int c = Py_CHARMASK(*s++);
2094 if (islower(c))
2095 *s_new = toupper(c);
2096 else
2097 *s_new = c;
2098 s_new++;
2099 }
2100 for (i = 1; i < n; i++) {
2101 int c = Py_CHARMASK(*s++);
2102 if (isupper(c))
2103 *s_new = tolower(c);
2104 else
2105 *s_new = c;
2106 s_new++;
2107 }
2108 return newobj;
2109 }
2110
2111
2112 PyDoc_STRVAR(count__doc__,
2113 "S.count(sub[, start[, end]]) -> int\n\
2114 \n\
2115 Return the number of non-overlapping occurrences of substring sub in\n\
2116 string S[start:end]. Optional arguments start and end are interpreted\n\
2117 as in slice notation.");
2118
2119 static PyObject *
string_count(PyStringObject * self,PyObject * args)2120 string_count(PyStringObject *self, PyObject *args)
2121 {
2122 PyObject *sub_obj;
2123 const char *str = PyString_AS_STRING(self), *sub;
2124 Py_ssize_t sub_len;
2125 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2126
2127 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2128 return NULL;
2129
2130 if (PyString_Check(sub_obj)) {
2131 sub = PyString_AS_STRING(sub_obj);
2132 sub_len = PyString_GET_SIZE(sub_obj);
2133 }
2134 #ifdef Py_USING_UNICODE
2135 else if (PyUnicode_Check(sub_obj)) {
2136 Py_ssize_t count;
2137 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138 if (count == -1)
2139 return NULL;
2140 else
2141 return PyInt_FromSsize_t(count);
2142 }
2143 #endif
2144 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145 return NULL;
2146
2147 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2148
2149 return PyInt_FromSsize_t(
2150 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151 );
2152 }
2153
2154 PyDoc_STRVAR(swapcase__doc__,
2155 "S.swapcase() -> string\n\
2156 \n\
2157 Return a copy of the string S with uppercase characters\n\
2158 converted to lowercase and vice versa.");
2159
2160 static PyObject *
string_swapcase(PyStringObject * self)2161 string_swapcase(PyStringObject *self)
2162 {
2163 char *s = PyString_AS_STRING(self), *s_new;
2164 Py_ssize_t i, n = PyString_GET_SIZE(self);
2165 PyObject *newobj;
2166
2167 newobj = PyString_FromStringAndSize(NULL, n);
2168 if (newobj == NULL)
2169 return NULL;
2170 s_new = PyString_AsString(newobj);
2171 for (i = 0; i < n; i++) {
2172 int c = Py_CHARMASK(*s++);
2173 if (islower(c)) {
2174 *s_new = toupper(c);
2175 }
2176 else if (isupper(c)) {
2177 *s_new = tolower(c);
2178 }
2179 else
2180 *s_new = c;
2181 s_new++;
2182 }
2183 return newobj;
2184 }
2185
2186
2187 PyDoc_STRVAR(translate__doc__,
2188 "S.translate(table [,deletechars]) -> string\n\
2189 \n\
2190 Return a copy of the string S, where all characters occurring\n\
2191 in the optional argument deletechars are removed, and the\n\
2192 remaining characters have been mapped through the given\n\
2193 translation table, which must be a string of length 256 or None.\n\
2194 If the table argument is None, no translation is applied and\n\
2195 the operation simply removes the characters in deletechars.");
2196
2197 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2198 string_translate(PyStringObject *self, PyObject *args)
2199 {
2200 register char *input, *output;
2201 const char *table;
2202 register Py_ssize_t i, c, changed = 0;
2203 PyObject *input_obj = (PyObject*)self;
2204 const char *output_start, *del_table=NULL;
2205 Py_ssize_t inlen, tablen, dellen = 0;
2206 PyObject *result;
2207 int trans_table[256];
2208 PyObject *tableobj, *delobj = NULL;
2209
2210 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211 &tableobj, &delobj))
2212 return NULL;
2213
2214 if (PyString_Check(tableobj)) {
2215 table = PyString_AS_STRING(tableobj);
2216 tablen = PyString_GET_SIZE(tableobj);
2217 }
2218 else if (tableobj == Py_None) {
2219 table = NULL;
2220 tablen = 256;
2221 }
2222 #ifdef Py_USING_UNICODE
2223 else if (PyUnicode_Check(tableobj)) {
2224 /* Unicode .translate() does not support the deletechars
2225 parameter; instead a mapping to None will cause characters
2226 to be deleted. */
2227 if (delobj != NULL) {
2228 PyErr_SetString(PyExc_TypeError,
2229 "deletions are implemented differently for unicode");
2230 return NULL;
2231 }
2232 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233 }
2234 #endif
2235 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236 return NULL;
2237
2238 if (tablen != 256) {
2239 PyErr_SetString(PyExc_ValueError,
2240 "translation table must be 256 characters long");
2241 return NULL;
2242 }
2243
2244 if (delobj != NULL) {
2245 if (PyString_Check(delobj)) {
2246 del_table = PyString_AS_STRING(delobj);
2247 dellen = PyString_GET_SIZE(delobj);
2248 }
2249 #ifdef Py_USING_UNICODE
2250 else if (PyUnicode_Check(delobj)) {
2251 PyErr_SetString(PyExc_TypeError,
2252 "deletions are implemented differently for unicode");
2253 return NULL;
2254 }
2255 #endif
2256 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257 return NULL;
2258 }
2259 else {
2260 del_table = NULL;
2261 dellen = 0;
2262 }
2263
2264 inlen = PyString_GET_SIZE(input_obj);
2265 result = PyString_FromStringAndSize((char *)NULL, inlen);
2266 if (result == NULL)
2267 return NULL;
2268 output_start = output = PyString_AsString(result);
2269 input = PyString_AS_STRING(input_obj);
2270
2271 if (dellen == 0 && table != NULL) {
2272 /* If no deletions are required, use faster code */
2273 for (i = inlen; --i >= 0; ) {
2274 c = Py_CHARMASK(*input++);
2275 if (Py_CHARMASK((*output++ = table[c])) != c)
2276 changed = 1;
2277 }
2278 if (changed || !PyString_CheckExact(input_obj))
2279 return result;
2280 Py_DECREF(result);
2281 Py_INCREF(input_obj);
2282 return input_obj;
2283 }
2284
2285 if (table == NULL) {
2286 for (i = 0; i < 256; i++)
2287 trans_table[i] = Py_CHARMASK(i);
2288 } else {
2289 for (i = 0; i < 256; i++)
2290 trans_table[i] = Py_CHARMASK(table[i]);
2291 }
2292
2293 for (i = 0; i < dellen; i++)
2294 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2295
2296 for (i = inlen; --i >= 0; ) {
2297 c = Py_CHARMASK(*input++);
2298 if (trans_table[c] != -1)
2299 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300 continue;
2301 changed = 1;
2302 }
2303 if (!changed && PyString_CheckExact(input_obj)) {
2304 Py_DECREF(result);
2305 Py_INCREF(input_obj);
2306 return input_obj;
2307 }
2308 /* Fix the size of the resulting string */
2309 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310 return NULL;
2311 return result;
2312 }
2313
2314
2315 /* find and count characters and substrings */
2316
2317 #define findchar(target, target_len, c) \
2318 ((char *)memchr((const void *)(target), c, target_len))
2319
2320 /* String ops must return a string. */
2321 /* If the object is subclass of string, create a copy */
2322 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2323 return_self(PyStringObject *self)
2324 {
2325 if (PyString_CheckExact(self)) {
2326 Py_INCREF(self);
2327 return self;
2328 }
2329 return (PyStringObject *)PyString_FromStringAndSize(
2330 PyString_AS_STRING(self),
2331 PyString_GET_SIZE(self));
2332 }
2333
2334 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2335 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2336 {
2337 Py_ssize_t count=0;
2338 const char *start=target;
2339 const char *end=target+target_len;
2340
2341 while ( (start=findchar(start, end-start, c)) != NULL ) {
2342 count++;
2343 if (count >= maxcount)
2344 break;
2345 start += 1;
2346 }
2347 return count;
2348 }
2349
2350
2351 /* Algorithms for different cases of string replacement */
2352
2353 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2354 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2355 replace_interleave(PyStringObject *self,
2356 const char *to_s, Py_ssize_t to_len,
2357 Py_ssize_t maxcount)
2358 {
2359 char *self_s, *result_s;
2360 Py_ssize_t self_len, result_len;
2361 Py_ssize_t count, i, product;
2362 PyStringObject *result;
2363
2364 self_len = PyString_GET_SIZE(self);
2365
2366 /* 1 at the end plus 1 after every character */
2367 count = self_len+1;
2368 if (maxcount < count)
2369 count = maxcount;
2370
2371 /* Check for overflow */
2372 /* result_len = count * to_len + self_len; */
2373 product = count * to_len;
2374 if (product / to_len != count) {
2375 PyErr_SetString(PyExc_OverflowError,
2376 "replace string is too long");
2377 return NULL;
2378 }
2379 result_len = product + self_len;
2380 if (result_len < 0) {
2381 PyErr_SetString(PyExc_OverflowError,
2382 "replace string is too long");
2383 return NULL;
2384 }
2385
2386 if (! (result = (PyStringObject *)
2387 PyString_FromStringAndSize(NULL, result_len)) )
2388 return NULL;
2389
2390 self_s = PyString_AS_STRING(self);
2391 result_s = PyString_AS_STRING(result);
2392
2393 /* TODO: special case single character, which doesn't need memcpy */
2394
2395 /* Lay the first one down (guaranteed this will occur) */
2396 Py_MEMCPY(result_s, to_s, to_len);
2397 result_s += to_len;
2398 count -= 1;
2399
2400 for (i=0; i<count; i++) {
2401 *result_s++ = *self_s++;
2402 Py_MEMCPY(result_s, to_s, to_len);
2403 result_s += to_len;
2404 }
2405
2406 /* Copy the rest of the original string */
2407 Py_MEMCPY(result_s, self_s, self_len-i);
2408
2409 return result;
2410 }
2411
2412 /* Special case for deleting a single character */
2413 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2414 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2415 replace_delete_single_character(PyStringObject *self,
2416 char from_c, Py_ssize_t maxcount)
2417 {
2418 char *self_s, *result_s;
2419 char *start, *next, *end;
2420 Py_ssize_t self_len, result_len;
2421 Py_ssize_t count;
2422 PyStringObject *result;
2423
2424 self_len = PyString_GET_SIZE(self);
2425 self_s = PyString_AS_STRING(self);
2426
2427 count = countchar(self_s, self_len, from_c, maxcount);
2428 if (count == 0) {
2429 return return_self(self);
2430 }
2431
2432 result_len = self_len - count; /* from_len == 1 */
2433 assert(result_len>=0);
2434
2435 if ( (result = (PyStringObject *)
2436 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2437 return NULL;
2438 result_s = PyString_AS_STRING(result);
2439
2440 start = self_s;
2441 end = self_s + self_len;
2442 while (count-- > 0) {
2443 next = findchar(start, end-start, from_c);
2444 if (next == NULL)
2445 break;
2446 Py_MEMCPY(result_s, start, next-start);
2447 result_s += (next-start);
2448 start = next+1;
2449 }
2450 Py_MEMCPY(result_s, start, end-start);
2451
2452 return result;
2453 }
2454
2455 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2456
2457 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2458 replace_delete_substring(PyStringObject *self,
2459 const char *from_s, Py_ssize_t from_len,
2460 Py_ssize_t maxcount) {
2461 char *self_s, *result_s;
2462 char *start, *next, *end;
2463 Py_ssize_t self_len, result_len;
2464 Py_ssize_t count, offset;
2465 PyStringObject *result;
2466
2467 self_len = PyString_GET_SIZE(self);
2468 self_s = PyString_AS_STRING(self);
2469
2470 count = stringlib_count(self_s, self_len,
2471 from_s, from_len,
2472 maxcount);
2473
2474 if (count == 0) {
2475 /* no matches */
2476 return return_self(self);
2477 }
2478
2479 result_len = self_len - (count * from_len);
2480 assert (result_len>=0);
2481
2482 if ( (result = (PyStringObject *)
2483 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2484 return NULL;
2485
2486 result_s = PyString_AS_STRING(result);
2487
2488 start = self_s;
2489 end = self_s + self_len;
2490 while (count-- > 0) {
2491 offset = stringlib_find(start, end-start,
2492 from_s, from_len,
2493 0);
2494 if (offset == -1)
2495 break;
2496 next = start + offset;
2497
2498 Py_MEMCPY(result_s, start, next-start);
2499
2500 result_s += (next-start);
2501 start = next+from_len;
2502 }
2503 Py_MEMCPY(result_s, start, end-start);
2504 return result;
2505 }
2506
2507 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2508 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2509 replace_single_character_in_place(PyStringObject *self,
2510 char from_c, char to_c,
2511 Py_ssize_t maxcount)
2512 {
2513 char *self_s, *result_s, *start, *end, *next;
2514 Py_ssize_t self_len;
2515 PyStringObject *result;
2516
2517 /* The result string will be the same size */
2518 self_s = PyString_AS_STRING(self);
2519 self_len = PyString_GET_SIZE(self);
2520
2521 next = findchar(self_s, self_len, from_c);
2522
2523 if (next == NULL) {
2524 /* No matches; return the original string */
2525 return return_self(self);
2526 }
2527
2528 /* Need to make a new string */
2529 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2530 if (result == NULL)
2531 return NULL;
2532 result_s = PyString_AS_STRING(result);
2533 Py_MEMCPY(result_s, self_s, self_len);
2534
2535 /* change everything in-place, starting with this one */
2536 start = result_s + (next-self_s);
2537 *start = to_c;
2538 start++;
2539 end = result_s + self_len;
2540
2541 while (--maxcount > 0) {
2542 next = findchar(start, end-start, from_c);
2543 if (next == NULL)
2544 break;
2545 *next = to_c;
2546 start = next+1;
2547 }
2548
2549 return result;
2550 }
2551
2552 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2553 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2554 replace_substring_in_place(PyStringObject *self,
2555 const char *from_s, Py_ssize_t from_len,
2556 const char *to_s, Py_ssize_t to_len,
2557 Py_ssize_t maxcount)
2558 {
2559 char *result_s, *start, *end;
2560 char *self_s;
2561 Py_ssize_t self_len, offset;
2562 PyStringObject *result;
2563
2564 /* The result string will be the same size */
2565
2566 self_s = PyString_AS_STRING(self);
2567 self_len = PyString_GET_SIZE(self);
2568
2569 offset = stringlib_find(self_s, self_len,
2570 from_s, from_len,
2571 0);
2572 if (offset == -1) {
2573 /* No matches; return the original string */
2574 return return_self(self);
2575 }
2576
2577 /* Need to make a new string */
2578 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2579 if (result == NULL)
2580 return NULL;
2581 result_s = PyString_AS_STRING(result);
2582 Py_MEMCPY(result_s, self_s, self_len);
2583
2584 /* change everything in-place, starting with this one */
2585 start = result_s + offset;
2586 Py_MEMCPY(start, to_s, from_len);
2587 start += from_len;
2588 end = result_s + self_len;
2589
2590 while ( --maxcount > 0) {
2591 offset = stringlib_find(start, end-start,
2592 from_s, from_len,
2593 0);
2594 if (offset==-1)
2595 break;
2596 Py_MEMCPY(start+offset, to_s, from_len);
2597 start += offset+from_len;
2598 }
2599
2600 return result;
2601 }
2602
2603 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2604 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2605 replace_single_character(PyStringObject *self,
2606 char from_c,
2607 const char *to_s, Py_ssize_t to_len,
2608 Py_ssize_t maxcount)
2609 {
2610 char *self_s, *result_s;
2611 char *start, *next, *end;
2612 Py_ssize_t self_len, result_len;
2613 Py_ssize_t count, product;
2614 PyStringObject *result;
2615
2616 self_s = PyString_AS_STRING(self);
2617 self_len = PyString_GET_SIZE(self);
2618
2619 count = countchar(self_s, self_len, from_c, maxcount);
2620 if (count == 0) {
2621 /* no matches, return unchanged */
2622 return return_self(self);
2623 }
2624
2625 /* use the difference between current and new, hence the "-1" */
2626 /* result_len = self_len + count * (to_len-1) */
2627 product = count * (to_len-1);
2628 if (product / (to_len-1) != count) {
2629 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630 return NULL;
2631 }
2632 result_len = self_len + product;
2633 if (result_len < 0) {
2634 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635 return NULL;
2636 }
2637
2638 if ( (result = (PyStringObject *)
2639 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2640 return NULL;
2641 result_s = PyString_AS_STRING(result);
2642
2643 start = self_s;
2644 end = self_s + self_len;
2645 while (count-- > 0) {
2646 next = findchar(start, end-start, from_c);
2647 if (next == NULL)
2648 break;
2649
2650 if (next == start) {
2651 /* replace with the 'to' */
2652 Py_MEMCPY(result_s, to_s, to_len);
2653 result_s += to_len;
2654 start += 1;
2655 } else {
2656 /* copy the unchanged old then the 'to' */
2657 Py_MEMCPY(result_s, start, next-start);
2658 result_s += (next-start);
2659 Py_MEMCPY(result_s, to_s, to_len);
2660 result_s += to_len;
2661 start = next+1;
2662 }
2663 }
2664 /* Copy the remainder of the remaining string */
2665 Py_MEMCPY(result_s, start, end-start);
2666
2667 return result;
2668 }
2669
2670 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2671 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2672 replace_substring(PyStringObject *self,
2673 const char *from_s, Py_ssize_t from_len,
2674 const char *to_s, Py_ssize_t to_len,
2675 Py_ssize_t maxcount) {
2676 char *self_s, *result_s;
2677 char *start, *next, *end;
2678 Py_ssize_t self_len, result_len;
2679 Py_ssize_t count, offset, product;
2680 PyStringObject *result;
2681
2682 self_s = PyString_AS_STRING(self);
2683 self_len = PyString_GET_SIZE(self);
2684
2685 count = stringlib_count(self_s, self_len,
2686 from_s, from_len,
2687 maxcount);
2688
2689 if (count == 0) {
2690 /* no matches, return unchanged */
2691 return return_self(self);
2692 }
2693
2694 /* Check for overflow */
2695 /* result_len = self_len + count * (to_len-from_len) */
2696 product = count * (to_len-from_len);
2697 if (product / (to_len-from_len) != count) {
2698 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699 return NULL;
2700 }
2701 result_len = self_len + product;
2702 if (result_len < 0) {
2703 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2704 return NULL;
2705 }
2706
2707 if ( (result = (PyStringObject *)
2708 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2709 return NULL;
2710 result_s = PyString_AS_STRING(result);
2711
2712 start = self_s;
2713 end = self_s + self_len;
2714 while (count-- > 0) {
2715 offset = stringlib_find(start, end-start,
2716 from_s, from_len,
2717 0);
2718 if (offset == -1)
2719 break;
2720 next = start+offset;
2721 if (next == start) {
2722 /* replace with the 'to' */
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 start += from_len;
2726 } else {
2727 /* copy the unchanged old then the 'to' */
2728 Py_MEMCPY(result_s, start, next-start);
2729 result_s += (next-start);
2730 Py_MEMCPY(result_s, to_s, to_len);
2731 result_s += to_len;
2732 start = next+from_len;
2733 }
2734 }
2735 /* Copy the remainder of the remaining string */
2736 Py_MEMCPY(result_s, start, end-start);
2737
2738 return result;
2739 }
2740
2741
2742 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2743 replace(PyStringObject *self,
2744 const char *from_s, Py_ssize_t from_len,
2745 const char *to_s, Py_ssize_t to_len,
2746 Py_ssize_t maxcount)
2747 {
2748 if (maxcount < 0) {
2749 maxcount = PY_SSIZE_T_MAX;
2750 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2751 /* nothing to do; return the original string */
2752 return return_self(self);
2753 }
2754
2755 if (maxcount == 0 ||
2756 (from_len == 0 && to_len == 0)) {
2757 /* nothing to do; return the original string */
2758 return return_self(self);
2759 }
2760
2761 /* Handle zero-length special cases */
2762
2763 if (from_len == 0) {
2764 /* insert the 'to' string everywhere. */
2765 /* >>> "Python".replace("", ".") */
2766 /* '.P.y.t.h.o.n.' */
2767 return replace_interleave(self, to_s, to_len, maxcount);
2768 }
2769
2770 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2771 /* point for an empty self string to generate a non-empty string */
2772 /* Special case so the remaining code always gets a non-empty string */
2773 if (PyString_GET_SIZE(self) == 0) {
2774 return return_self(self);
2775 }
2776
2777 if (to_len == 0) {
2778 /* delete all occurrences of 'from' string */
2779 if (from_len == 1) {
2780 return replace_delete_single_character(
2781 self, from_s[0], maxcount);
2782 } else {
2783 return replace_delete_substring(self, from_s, from_len, maxcount);
2784 }
2785 }
2786
2787 /* Handle special case where both strings have the same length */
2788
2789 if (from_len == to_len) {
2790 if (from_len == 1) {
2791 return replace_single_character_in_place(
2792 self,
2793 from_s[0],
2794 to_s[0],
2795 maxcount);
2796 } else {
2797 return replace_substring_in_place(
2798 self, from_s, from_len, to_s, to_len, maxcount);
2799 }
2800 }
2801
2802 /* Otherwise use the more generic algorithms */
2803 if (from_len == 1) {
2804 return replace_single_character(self, from_s[0],
2805 to_s, to_len, maxcount);
2806 } else {
2807 /* len('from')>=2, len('to')>=1 */
2808 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2809 }
2810 }
2811
2812 PyDoc_STRVAR(replace__doc__,
2813 "S.replace(old, new[, count]) -> string\n\
2814 \n\
2815 Return a copy of string S with all occurrences of substring\n\
2816 old replaced by new. If the optional argument count is\n\
2817 given, only the first count occurrences are replaced.");
2818
2819 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2820 string_replace(PyStringObject *self, PyObject *args)
2821 {
2822 Py_ssize_t count = -1;
2823 PyObject *from, *to;
2824 const char *from_s, *to_s;
2825 Py_ssize_t from_len, to_len;
2826
2827 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2828 return NULL;
2829
2830 if (PyString_Check(from)) {
2831 from_s = PyString_AS_STRING(from);
2832 from_len = PyString_GET_SIZE(from);
2833 }
2834 #ifdef Py_USING_UNICODE
2835 if (PyUnicode_Check(from))
2836 return PyUnicode_Replace((PyObject *)self,
2837 from, to, count);
2838 #endif
2839 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2840 return NULL;
2841
2842 if (PyString_Check(to)) {
2843 to_s = PyString_AS_STRING(to);
2844 to_len = PyString_GET_SIZE(to);
2845 }
2846 #ifdef Py_USING_UNICODE
2847 else if (PyUnicode_Check(to))
2848 return PyUnicode_Replace((PyObject *)self,
2849 from, to, count);
2850 #endif
2851 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2852 return NULL;
2853
2854 return (PyObject *)replace((PyStringObject *) self,
2855 from_s, from_len,
2856 to_s, to_len, count);
2857 }
2858
2859 /** End DALKE **/
2860
2861 /* Matches the end (direction >= 0) or start (direction < 0) of self
2862 * against substr, using the start and end arguments. Returns
2863 * -1 on error, 0 if not found and 1 if found.
2864 */
2865 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2866 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2867 Py_ssize_t end, int direction)
2868 {
2869 Py_ssize_t len = PyString_GET_SIZE(self);
2870 Py_ssize_t slen;
2871 const char* sub;
2872 const char* str;
2873
2874 if (PyString_Check(substr)) {
2875 sub = PyString_AS_STRING(substr);
2876 slen = PyString_GET_SIZE(substr);
2877 }
2878 #ifdef Py_USING_UNICODE
2879 else if (PyUnicode_Check(substr))
2880 return PyUnicode_Tailmatch((PyObject *)self,
2881 substr, start, end, direction);
2882 #endif
2883 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2884 return -1;
2885 str = PyString_AS_STRING(self);
2886
2887 ADJUST_INDICES(start, end, len);
2888
2889 if (direction < 0) {
2890 /* startswith */
2891 if (start+slen > len)
2892 return 0;
2893 } else {
2894 /* endswith */
2895 if (end-start < slen || start > len)
2896 return 0;
2897
2898 if (end-slen > start)
2899 start = end - slen;
2900 }
2901 if (end-start >= slen)
2902 return ! memcmp(str+start, sub, slen);
2903 return 0;
2904 }
2905
2906
2907 PyDoc_STRVAR(startswith__doc__,
2908 "S.startswith(prefix[, start[, end]]) -> bool\n\
2909 \n\
2910 Return True if S starts with the specified prefix, False otherwise.\n\
2911 With optional start, test S beginning at that position.\n\
2912 With optional end, stop comparing S at that position.\n\
2913 prefix can also be a tuple of strings to try.");
2914
2915 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2916 string_startswith(PyStringObject *self, PyObject *args)
2917 {
2918 Py_ssize_t start = 0;
2919 Py_ssize_t end = PY_SSIZE_T_MAX;
2920 PyObject *subobj;
2921 int result;
2922
2923 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2924 return NULL;
2925 if (PyTuple_Check(subobj)) {
2926 Py_ssize_t i;
2927 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2928 result = _string_tailmatch(self,
2929 PyTuple_GET_ITEM(subobj, i),
2930 start, end, -1);
2931 if (result == -1)
2932 return NULL;
2933 else if (result) {
2934 Py_RETURN_TRUE;
2935 }
2936 }
2937 Py_RETURN_FALSE;
2938 }
2939 result = _string_tailmatch(self, subobj, start, end, -1);
2940 if (result == -1) {
2941 if (PyErr_ExceptionMatches(PyExc_TypeError))
2942 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2943 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2944 return NULL;
2945 }
2946 else
2947 return PyBool_FromLong(result);
2948 }
2949
2950
2951 PyDoc_STRVAR(endswith__doc__,
2952 "S.endswith(suffix[, start[, end]]) -> bool\n\
2953 \n\
2954 Return True if S ends with the specified suffix, False otherwise.\n\
2955 With optional start, test S beginning at that position.\n\
2956 With optional end, stop comparing S at that position.\n\
2957 suffix can also be a tuple of strings to try.");
2958
2959 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2960 string_endswith(PyStringObject *self, PyObject *args)
2961 {
2962 Py_ssize_t start = 0;
2963 Py_ssize_t end = PY_SSIZE_T_MAX;
2964 PyObject *subobj;
2965 int result;
2966
2967 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2968 return NULL;
2969 if (PyTuple_Check(subobj)) {
2970 Py_ssize_t i;
2971 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2972 result = _string_tailmatch(self,
2973 PyTuple_GET_ITEM(subobj, i),
2974 start, end, +1);
2975 if (result == -1)
2976 return NULL;
2977 else if (result) {
2978 Py_RETURN_TRUE;
2979 }
2980 }
2981 Py_RETURN_FALSE;
2982 }
2983 result = _string_tailmatch(self, subobj, start, end, +1);
2984 if (result == -1) {
2985 if (PyErr_ExceptionMatches(PyExc_TypeError))
2986 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2987 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2988 return NULL;
2989 }
2990 else
2991 return PyBool_FromLong(result);
2992 }
2993
2994
2995 PyDoc_STRVAR(encode__doc__,
2996 "S.encode([encoding[,errors]]) -> object\n\
2997 \n\
2998 Encodes S using the codec registered for encoding. encoding defaults\n\
2999 to the default encoding. errors may be given to set a different error\n\
3000 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3001 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3002 'xmlcharrefreplace' as well as any other name registered with\n\
3003 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3004
3005 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3006 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3007 {
3008 static char *kwlist[] = {"encoding", "errors", 0};
3009 char *encoding = NULL;
3010 char *errors = NULL;
3011 PyObject *v;
3012
3013 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3014 kwlist, &encoding, &errors))
3015 return NULL;
3016 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3017 if (v == NULL)
3018 goto onError;
3019 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3020 PyErr_Format(PyExc_TypeError,
3021 "encoder did not return a string/unicode object "
3022 "(type=%.400s)",
3023 Py_TYPE(v)->tp_name);
3024 Py_DECREF(v);
3025 return NULL;
3026 }
3027 return v;
3028
3029 onError:
3030 return NULL;
3031 }
3032
3033
3034 PyDoc_STRVAR(decode__doc__,
3035 "S.decode([encoding[,errors]]) -> object\n\
3036 \n\
3037 Decodes S using the codec registered for encoding. encoding defaults\n\
3038 to the default encoding. errors may be given to set a different error\n\
3039 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3040 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3041 as well as any other name registered with codecs.register_error that is\n\
3042 able to handle UnicodeDecodeErrors.");
3043
3044 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3045 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3046 {
3047 static char *kwlist[] = {"encoding", "errors", 0};
3048 char *encoding = NULL;
3049 char *errors = NULL;
3050 PyObject *v;
3051
3052 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3053 kwlist, &encoding, &errors))
3054 return NULL;
3055 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3056 if (v == NULL)
3057 goto onError;
3058 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3059 PyErr_Format(PyExc_TypeError,
3060 "decoder did not return a string/unicode object "
3061 "(type=%.400s)",
3062 Py_TYPE(v)->tp_name);
3063 Py_DECREF(v);
3064 return NULL;
3065 }
3066 return v;
3067
3068 onError:
3069 return NULL;
3070 }
3071
3072
3073 PyDoc_STRVAR(expandtabs__doc__,
3074 "S.expandtabs([tabsize]) -> string\n\
3075 \n\
3076 Return a copy of S where all tab characters are expanded using spaces.\n\
3077 If tabsize is not given, a tab size of 8 characters is assumed.");
3078
3079 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3080 string_expandtabs(PyStringObject *self, PyObject *args)
3081 {
3082 const char *e, *p, *qe;
3083 char *q;
3084 Py_ssize_t i, j, incr;
3085 PyObject *u;
3086 int tabsize = 8;
3087
3088 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3089 return NULL;
3090
3091 /* First pass: determine size of output string */
3092 i = 0; /* chars up to and including most recent \n or \r */
3093 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3094 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3095 for (p = PyString_AS_STRING(self); p < e; p++) {
3096 if (*p == '\t') {
3097 if (tabsize > 0) {
3098 incr = tabsize - (j % tabsize);
3099 if (j > PY_SSIZE_T_MAX - incr)
3100 goto overflow1;
3101 j += incr;
3102 }
3103 }
3104 else {
3105 if (j > PY_SSIZE_T_MAX - 1)
3106 goto overflow1;
3107 j++;
3108 if (*p == '\n' || *p == '\r') {
3109 if (i > PY_SSIZE_T_MAX - j)
3110 goto overflow1;
3111 i += j;
3112 j = 0;
3113 }
3114 }
3115 }
3116
3117 if (i > PY_SSIZE_T_MAX - j)
3118 goto overflow1;
3119
3120 /* Second pass: create output string and fill it */
3121 u = PyString_FromStringAndSize(NULL, i + j);
3122 if (!u)
3123 return NULL;
3124
3125 j = 0; /* same as in first pass */
3126 q = PyString_AS_STRING(u); /* next output char */
3127 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3128
3129 for (p = PyString_AS_STRING(self); p < e; p++) {
3130 if (*p == '\t') {
3131 if (tabsize > 0) {
3132 i = tabsize - (j % tabsize);
3133 j += i;
3134 while (i--) {
3135 if (q >= qe)
3136 goto overflow2;
3137 *q++ = ' ';
3138 }
3139 }
3140 }
3141 else {
3142 if (q >= qe)
3143 goto overflow2;
3144 *q++ = *p;
3145 j++;
3146 if (*p == '\n' || *p == '\r')
3147 j = 0;
3148 }
3149 }
3150
3151 return u;
3152
3153 overflow2:
3154 Py_DECREF(u);
3155 overflow1:
3156 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3157 return NULL;
3158 }
3159
3160 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3161 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3162 {
3163 PyObject *u;
3164
3165 if (left < 0)
3166 left = 0;
3167 if (right < 0)
3168 right = 0;
3169
3170 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3171 Py_INCREF(self);
3172 return (PyObject *)self;
3173 }
3174
3175 u = PyString_FromStringAndSize(NULL,
3176 left + PyString_GET_SIZE(self) + right);
3177 if (u) {
3178 if (left)
3179 memset(PyString_AS_STRING(u), fill, left);
3180 Py_MEMCPY(PyString_AS_STRING(u) + left,
3181 PyString_AS_STRING(self),
3182 PyString_GET_SIZE(self));
3183 if (right)
3184 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3185 fill, right);
3186 }
3187
3188 return u;
3189 }
3190
3191 PyDoc_STRVAR(ljust__doc__,
3192 "S.ljust(width[, fillchar]) -> string\n"
3193 "\n"
3194 "Return S left-justified in a string of length width. Padding is\n"
3195 "done using the specified fill character (default is a space).");
3196
3197 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3198 string_ljust(PyStringObject *self, PyObject *args)
3199 {
3200 Py_ssize_t width;
3201 char fillchar = ' ';
3202
3203 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3204 return NULL;
3205
3206 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3207 Py_INCREF(self);
3208 return (PyObject*) self;
3209 }
3210
3211 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3212 }
3213
3214
3215 PyDoc_STRVAR(rjust__doc__,
3216 "S.rjust(width[, fillchar]) -> string\n"
3217 "\n"
3218 "Return S right-justified in a string of length width. Padding is\n"
3219 "done using the specified fill character (default is a space)");
3220
3221 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3222 string_rjust(PyStringObject *self, PyObject *args)
3223 {
3224 Py_ssize_t width;
3225 char fillchar = ' ';
3226
3227 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3228 return NULL;
3229
3230 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3231 Py_INCREF(self);
3232 return (PyObject*) self;
3233 }
3234
3235 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3236 }
3237
3238
3239 PyDoc_STRVAR(center__doc__,
3240 "S.center(width[, fillchar]) -> string\n"
3241 "\n"
3242 "Return S centered in a string of length width. Padding is\n"
3243 "done using the specified fill character (default is a space)");
3244
3245 static PyObject *
string_center(PyStringObject * self,PyObject * args)3246 string_center(PyStringObject *self, PyObject *args)
3247 {
3248 Py_ssize_t marg, left;
3249 Py_ssize_t width;
3250 char fillchar = ' ';
3251
3252 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3253 return NULL;
3254
3255 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3256 Py_INCREF(self);
3257 return (PyObject*) self;
3258 }
3259
3260 marg = width - PyString_GET_SIZE(self);
3261 left = marg / 2 + (marg & width & 1);
3262
3263 return pad(self, left, marg - left, fillchar);
3264 }
3265
3266 PyDoc_STRVAR(zfill__doc__,
3267 "S.zfill(width) -> string\n"
3268 "\n"
3269 "Pad a numeric string S with zeros on the left, to fill a field\n"
3270 "of the specified width. The string S is never truncated.");
3271
3272 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3273 string_zfill(PyStringObject *self, PyObject *args)
3274 {
3275 Py_ssize_t fill;
3276 PyObject *s;
3277 char *p;
3278 Py_ssize_t width;
3279
3280 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3281 return NULL;
3282
3283 if (PyString_GET_SIZE(self) >= width) {
3284 if (PyString_CheckExact(self)) {
3285 Py_INCREF(self);
3286 return (PyObject*) self;
3287 }
3288 else
3289 return PyString_FromStringAndSize(
3290 PyString_AS_STRING(self),
3291 PyString_GET_SIZE(self)
3292 );
3293 }
3294
3295 fill = width - PyString_GET_SIZE(self);
3296
3297 s = pad(self, fill, 0, '0');
3298
3299 if (s == NULL)
3300 return NULL;
3301
3302 p = PyString_AS_STRING(s);
3303 if (p[fill] == '+' || p[fill] == '-') {
3304 /* move sign to beginning of string */
3305 p[0] = p[fill];
3306 p[fill] = '0';
3307 }
3308
3309 return (PyObject*) s;
3310 }
3311
3312 PyDoc_STRVAR(isspace__doc__,
3313 "S.isspace() -> bool\n\
3314 \n\
3315 Return True if all characters in S are whitespace\n\
3316 and there is at least one character in S, False otherwise.");
3317
3318 static PyObject*
string_isspace(PyStringObject * self)3319 string_isspace(PyStringObject *self)
3320 {
3321 register const unsigned char *p
3322 = (unsigned char *) PyString_AS_STRING(self);
3323 register const unsigned char *e;
3324
3325 /* Shortcut for single character strings */
3326 if (PyString_GET_SIZE(self) == 1 &&
3327 isspace(*p))
3328 return PyBool_FromLong(1);
3329
3330 /* Special case for empty strings */
3331 if (PyString_GET_SIZE(self) == 0)
3332 return PyBool_FromLong(0);
3333
3334 e = p + PyString_GET_SIZE(self);
3335 for (; p < e; p++) {
3336 if (!isspace(*p))
3337 return PyBool_FromLong(0);
3338 }
3339 return PyBool_FromLong(1);
3340 }
3341
3342
3343 PyDoc_STRVAR(isalpha__doc__,
3344 "S.isalpha() -> bool\n\
3345 \n\
3346 Return True if all characters in S are alphabetic\n\
3347 and there is at least one character in S, False otherwise.");
3348
3349 static PyObject*
string_isalpha(PyStringObject * self)3350 string_isalpha(PyStringObject *self)
3351 {
3352 register const unsigned char *p
3353 = (unsigned char *) PyString_AS_STRING(self);
3354 register const unsigned char *e;
3355
3356 /* Shortcut for single character strings */
3357 if (PyString_GET_SIZE(self) == 1 &&
3358 isalpha(*p))
3359 return PyBool_FromLong(1);
3360
3361 /* Special case for empty strings */
3362 if (PyString_GET_SIZE(self) == 0)
3363 return PyBool_FromLong(0);
3364
3365 e = p + PyString_GET_SIZE(self);
3366 for (; p < e; p++) {
3367 if (!isalpha(*p))
3368 return PyBool_FromLong(0);
3369 }
3370 return PyBool_FromLong(1);
3371 }
3372
3373
3374 PyDoc_STRVAR(isalnum__doc__,
3375 "S.isalnum() -> bool\n\
3376 \n\
3377 Return True if all characters in S are alphanumeric\n\
3378 and there is at least one character in S, False otherwise.");
3379
3380 static PyObject*
string_isalnum(PyStringObject * self)3381 string_isalnum(PyStringObject *self)
3382 {
3383 register const unsigned char *p
3384 = (unsigned char *) PyString_AS_STRING(self);
3385 register const unsigned char *e;
3386
3387 /* Shortcut for single character strings */
3388 if (PyString_GET_SIZE(self) == 1 &&
3389 isalnum(*p))
3390 return PyBool_FromLong(1);
3391
3392 /* Special case for empty strings */
3393 if (PyString_GET_SIZE(self) == 0)
3394 return PyBool_FromLong(0);
3395
3396 e = p + PyString_GET_SIZE(self);
3397 for (; p < e; p++) {
3398 if (!isalnum(*p))
3399 return PyBool_FromLong(0);
3400 }
3401 return PyBool_FromLong(1);
3402 }
3403
3404
3405 PyDoc_STRVAR(isdigit__doc__,
3406 "S.isdigit() -> bool\n\
3407 \n\
3408 Return True if all characters in S are digits\n\
3409 and there is at least one character in S, False otherwise.");
3410
3411 static PyObject*
string_isdigit(PyStringObject * self)3412 string_isdigit(PyStringObject *self)
3413 {
3414 register const unsigned char *p
3415 = (unsigned char *) PyString_AS_STRING(self);
3416 register const unsigned char *e;
3417
3418 /* Shortcut for single character strings */
3419 if (PyString_GET_SIZE(self) == 1 &&
3420 isdigit(*p))
3421 return PyBool_FromLong(1);
3422
3423 /* Special case for empty strings */
3424 if (PyString_GET_SIZE(self) == 0)
3425 return PyBool_FromLong(0);
3426
3427 e = p + PyString_GET_SIZE(self);
3428 for (; p < e; p++) {
3429 if (!isdigit(*p))
3430 return PyBool_FromLong(0);
3431 }
3432 return PyBool_FromLong(1);
3433 }
3434
3435
3436 PyDoc_STRVAR(islower__doc__,
3437 "S.islower() -> bool\n\
3438 \n\
3439 Return True if all cased characters in S are lowercase and there is\n\
3440 at least one cased character in S, False otherwise.");
3441
3442 static PyObject*
string_islower(PyStringObject * self)3443 string_islower(PyStringObject *self)
3444 {
3445 register const unsigned char *p
3446 = (unsigned char *) PyString_AS_STRING(self);
3447 register const unsigned char *e;
3448 int cased;
3449
3450 /* Shortcut for single character strings */
3451 if (PyString_GET_SIZE(self) == 1)
3452 return PyBool_FromLong(islower(*p) != 0);
3453
3454 /* Special case for empty strings */
3455 if (PyString_GET_SIZE(self) == 0)
3456 return PyBool_FromLong(0);
3457
3458 e = p + PyString_GET_SIZE(self);
3459 cased = 0;
3460 for (; p < e; p++) {
3461 if (isupper(*p))
3462 return PyBool_FromLong(0);
3463 else if (!cased && islower(*p))
3464 cased = 1;
3465 }
3466 return PyBool_FromLong(cased);
3467 }
3468
3469
3470 PyDoc_STRVAR(isupper__doc__,
3471 "S.isupper() -> bool\n\
3472 \n\
3473 Return True if all cased characters in S are uppercase and there is\n\
3474 at least one cased character in S, False otherwise.");
3475
3476 static PyObject*
string_isupper(PyStringObject * self)3477 string_isupper(PyStringObject *self)
3478 {
3479 register const unsigned char *p
3480 = (unsigned char *) PyString_AS_STRING(self);
3481 register const unsigned char *e;
3482 int cased;
3483
3484 /* Shortcut for single character strings */
3485 if (PyString_GET_SIZE(self) == 1)
3486 return PyBool_FromLong(isupper(*p) != 0);
3487
3488 /* Special case for empty strings */
3489 if (PyString_GET_SIZE(self) == 0)
3490 return PyBool_FromLong(0);
3491
3492 e = p + PyString_GET_SIZE(self);
3493 cased = 0;
3494 for (; p < e; p++) {
3495 if (islower(*p))
3496 return PyBool_FromLong(0);
3497 else if (!cased && isupper(*p))
3498 cased = 1;
3499 }
3500 return PyBool_FromLong(cased);
3501 }
3502
3503
3504 PyDoc_STRVAR(istitle__doc__,
3505 "S.istitle() -> bool\n\
3506 \n\
3507 Return True if S is a titlecased string and there is at least one\n\
3508 character in S, i.e. uppercase characters may only follow uncased\n\
3509 characters and lowercase characters only cased ones. Return False\n\
3510 otherwise.");
3511
3512 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3513 string_istitle(PyStringObject *self, PyObject *uncased)
3514 {
3515 register const unsigned char *p
3516 = (unsigned char *) PyString_AS_STRING(self);
3517 register const unsigned char *e;
3518 int cased, previous_is_cased;
3519
3520 /* Shortcut for single character strings */
3521 if (PyString_GET_SIZE(self) == 1)
3522 return PyBool_FromLong(isupper(*p) != 0);
3523
3524 /* Special case for empty strings */
3525 if (PyString_GET_SIZE(self) == 0)
3526 return PyBool_FromLong(0);
3527
3528 e = p + PyString_GET_SIZE(self);
3529 cased = 0;
3530 previous_is_cased = 0;
3531 for (; p < e; p++) {
3532 register const unsigned char ch = *p;
3533
3534 if (isupper(ch)) {
3535 if (previous_is_cased)
3536 return PyBool_FromLong(0);
3537 previous_is_cased = 1;
3538 cased = 1;
3539 }
3540 else if (islower(ch)) {
3541 if (!previous_is_cased)
3542 return PyBool_FromLong(0);
3543 previous_is_cased = 1;
3544 cased = 1;
3545 }
3546 else
3547 previous_is_cased = 0;
3548 }
3549 return PyBool_FromLong(cased);
3550 }
3551
3552
3553 PyDoc_STRVAR(splitlines__doc__,
3554 "S.splitlines(keepends=False) -> list of strings\n\
3555 \n\
3556 Return a list of the lines in S, breaking at line boundaries.\n\
3557 Line breaks are not included in the resulting list unless keepends\n\
3558 is given and true.");
3559
3560 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3561 string_splitlines(PyStringObject *self, PyObject *args)
3562 {
3563 int keepends = 0;
3564
3565 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3566 return NULL;
3567
3568 return stringlib_splitlines(
3569 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3570 keepends
3571 );
3572 }
3573
3574 PyDoc_STRVAR(sizeof__doc__,
3575 "S.__sizeof__() -> size of S in memory, in bytes");
3576
3577 static PyObject *
string_sizeof(PyStringObject * v)3578 string_sizeof(PyStringObject *v)
3579 {
3580 Py_ssize_t res;
3581 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3582 return PyInt_FromSsize_t(res);
3583 }
3584
3585 static PyObject *
string_getnewargs(PyStringObject * v)3586 string_getnewargs(PyStringObject *v)
3587 {
3588 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3589 }
3590
3591
3592 #include "stringlib/string_format.h"
3593
3594 PyDoc_STRVAR(format__doc__,
3595 "S.format(*args, **kwargs) -> string\n\
3596 \n\
3597 Return a formatted version of S, using substitutions from args and kwargs.\n\
3598 The substitutions are identified by braces ('{' and '}').");
3599
3600 static PyObject *
string__format__(PyObject * self,PyObject * args)3601 string__format__(PyObject* self, PyObject* args)
3602 {
3603 PyObject *format_spec;
3604 PyObject *result = NULL;
3605 PyObject *tmp = NULL;
3606
3607 /* If 2.x, convert format_spec to the same type as value */
3608 /* This is to allow things like u''.format('') */
3609 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3610 goto done;
3611 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3612 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3613 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3614 goto done;
3615 }
3616 tmp = PyObject_Str(format_spec);
3617 if (tmp == NULL)
3618 goto done;
3619 format_spec = tmp;
3620
3621 result = _PyBytes_FormatAdvanced(self,
3622 PyString_AS_STRING(format_spec),
3623 PyString_GET_SIZE(format_spec));
3624 done:
3625 Py_XDECREF(tmp);
3626 return result;
3627 }
3628
3629 PyDoc_STRVAR(p_format__doc__,
3630 "S.__format__(format_spec) -> string\n\
3631 \n\
3632 Return a formatted version of S as described by format_spec.");
3633
3634
3635 static PyMethodDef
3636 string_methods[] = {
3637 /* Counterparts of the obsolete stropmodule functions; except
3638 string.maketrans(). */
3639 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3640 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3641 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3642 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3643 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3644 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3645 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3646 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3647 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3648 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3649 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3650 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3651 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3652 capitalize__doc__},
3653 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3654 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3655 endswith__doc__},
3656 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3657 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3658 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3659 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3660 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3661 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3662 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3663 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3664 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3665 rpartition__doc__},
3666 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3667 startswith__doc__},
3668 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3669 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3670 swapcase__doc__},
3671 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3672 translate__doc__},
3673 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3674 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3675 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3676 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3677 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3678 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3679 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3680 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3681 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3682 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3683 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3684 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3685 expandtabs__doc__},
3686 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3687 splitlines__doc__},
3688 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3689 sizeof__doc__},
3690 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3691 {NULL, NULL} /* sentinel */
3692 };
3693
3694 static PyObject *
3695 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3696
3697 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3698 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3699 {
3700 PyObject *x = NULL;
3701 static char *kwlist[] = {"object", 0};
3702
3703 if (type != &PyString_Type)
3704 return str_subtype_new(type, args, kwds);
3705 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3706 return NULL;
3707 if (x == NULL)
3708 return PyString_FromString("");
3709 return PyObject_Str(x);
3710 }
3711
3712 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3713 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3714 {
3715 PyObject *tmp, *pnew;
3716 Py_ssize_t n;
3717
3718 assert(PyType_IsSubtype(type, &PyString_Type));
3719 tmp = string_new(&PyString_Type, args, kwds);
3720 if (tmp == NULL)
3721 return NULL;
3722 assert(PyString_Check(tmp));
3723 n = PyString_GET_SIZE(tmp);
3724 pnew = type->tp_alloc(type, n);
3725 if (pnew != NULL) {
3726 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3727 ((PyStringObject *)pnew)->ob_shash =
3728 ((PyStringObject *)tmp)->ob_shash;
3729 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3730 }
3731 Py_DECREF(tmp);
3732 return pnew;
3733 }
3734
3735 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3736 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3737 {
3738 PyErr_SetString(PyExc_TypeError,
3739 "The basestring type cannot be instantiated");
3740 return NULL;
3741 }
3742
3743 static PyObject *
string_mod(PyObject * v,PyObject * w)3744 string_mod(PyObject *v, PyObject *w)
3745 {
3746 if (!PyString_Check(v)) {
3747 Py_INCREF(Py_NotImplemented);
3748 return Py_NotImplemented;
3749 }
3750 return PyString_Format(v, w);
3751 }
3752
3753 PyDoc_STRVAR(basestring_doc,
3754 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3755
3756 static PyNumberMethods string_as_number = {
3757 0, /*nb_add*/
3758 0, /*nb_subtract*/
3759 0, /*nb_multiply*/
3760 0, /*nb_divide*/
3761 string_mod, /*nb_remainder*/
3762 };
3763
3764
3765 PyTypeObject PyBaseString_Type = {
3766 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3767 "basestring",
3768 0,
3769 0,
3770 0, /* tp_dealloc */
3771 0, /* tp_print */
3772 0, /* tp_getattr */
3773 0, /* tp_setattr */
3774 0, /* tp_compare */
3775 0, /* tp_repr */
3776 0, /* tp_as_number */
3777 0, /* tp_as_sequence */
3778 0, /* tp_as_mapping */
3779 0, /* tp_hash */
3780 0, /* tp_call */
3781 0, /* tp_str */
3782 0, /* tp_getattro */
3783 0, /* tp_setattro */
3784 0, /* tp_as_buffer */
3785 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3786 basestring_doc, /* tp_doc */
3787 0, /* tp_traverse */
3788 0, /* tp_clear */
3789 0, /* tp_richcompare */
3790 0, /* tp_weaklistoffset */
3791 0, /* tp_iter */
3792 0, /* tp_iternext */
3793 0, /* tp_methods */
3794 0, /* tp_members */
3795 0, /* tp_getset */
3796 &PyBaseObject_Type, /* tp_base */
3797 0, /* tp_dict */
3798 0, /* tp_descr_get */
3799 0, /* tp_descr_set */
3800 0, /* tp_dictoffset */
3801 0, /* tp_init */
3802 0, /* tp_alloc */
3803 basestring_new, /* tp_new */
3804 0, /* tp_free */
3805 };
3806
3807 PyDoc_STRVAR(string_doc,
3808 "str(object='') -> string\n\
3809 \n\
3810 Return a nice string representation of the object.\n\
3811 If the argument is a string, the return value is the same object.");
3812
3813 PyTypeObject PyString_Type = {
3814 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3815 "str",
3816 PyStringObject_SIZE,
3817 sizeof(char),
3818 string_dealloc, /* tp_dealloc */
3819 (printfunc)string_print, /* tp_print */
3820 0, /* tp_getattr */
3821 0, /* tp_setattr */
3822 0, /* tp_compare */
3823 string_repr, /* tp_repr */
3824 &string_as_number, /* tp_as_number */
3825 &string_as_sequence, /* tp_as_sequence */
3826 &string_as_mapping, /* tp_as_mapping */
3827 (hashfunc)string_hash, /* tp_hash */
3828 0, /* tp_call */
3829 string_str, /* tp_str */
3830 PyObject_GenericGetAttr, /* tp_getattro */
3831 0, /* tp_setattro */
3832 &string_as_buffer, /* tp_as_buffer */
3833 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3834 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3835 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3836 string_doc, /* tp_doc */
3837 0, /* tp_traverse */
3838 0, /* tp_clear */
3839 (richcmpfunc)string_richcompare, /* tp_richcompare */
3840 0, /* tp_weaklistoffset */
3841 0, /* tp_iter */
3842 0, /* tp_iternext */
3843 string_methods, /* tp_methods */
3844 0, /* tp_members */
3845 0, /* tp_getset */
3846 &PyBaseString_Type, /* tp_base */
3847 0, /* tp_dict */
3848 0, /* tp_descr_get */
3849 0, /* tp_descr_set */
3850 0, /* tp_dictoffset */
3851 0, /* tp_init */
3852 0, /* tp_alloc */
3853 string_new, /* tp_new */
3854 PyObject_Del, /* tp_free */
3855 };
3856
3857 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3858 PyString_Concat(register PyObject **pv, register PyObject *w)
3859 {
3860 register PyObject *v;
3861 if (*pv == NULL)
3862 return;
3863 if (w == NULL || !PyString_Check(*pv)) {
3864 Py_CLEAR(*pv);
3865 return;
3866 }
3867 v = string_concat((PyStringObject *) *pv, w);
3868 Py_SETREF(*pv, v);
3869 }
3870
3871 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3872 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3873 {
3874 PyString_Concat(pv, w);
3875 Py_XDECREF(w);
3876 }
3877
3878
3879 /* The following function breaks the notion that strings are immutable:
3880 it changes the size of a string. We get away with this only if there
3881 is only one module referencing the object. You can also think of it
3882 as creating a new string object and destroying the old one, only
3883 more efficiently. In any case, don't use this if the string may
3884 already be known to some other part of the code...
3885 Note that if there's not enough memory to resize the string, the original
3886 string object at *pv is deallocated, *pv is set to NULL, an "out of
3887 memory" exception is set, and -1 is returned. Else (on success) 0 is
3888 returned, and the value in *pv may or may not be the same as on input.
3889 As always, an extra byte is allocated for a trailing \0 byte (newsize
3890 does *not* include that), and a trailing \0 byte is stored.
3891 */
3892
3893 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3894 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3895 {
3896 register PyObject *v;
3897 register PyStringObject *sv;
3898 v = *pv;
3899 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3900 PyString_CHECK_INTERNED(v)) {
3901 *pv = 0;
3902 Py_DECREF(v);
3903 PyErr_BadInternalCall();
3904 return -1;
3905 }
3906 /* XXX UNREF/NEWREF interface should be more symmetrical */
3907 _Py_DEC_REFTOTAL;
3908 _Py_ForgetReference(v);
3909 *pv = (PyObject *)
3910 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3911 if (*pv == NULL) {
3912 PyObject_Del(v);
3913 PyErr_NoMemory();
3914 return -1;
3915 }
3916 _Py_NewReference(*pv);
3917 sv = (PyStringObject *) *pv;
3918 Py_SIZE(sv) = newsize;
3919 sv->ob_sval[newsize] = '\0';
3920 sv->ob_shash = -1; /* invalidate cached hash value */
3921 return 0;
3922 }
3923
3924 /* Helpers for formatstring */
3925
3926 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3927 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3928 {
3929 Py_ssize_t argidx = *p_argidx;
3930 if (argidx < arglen) {
3931 (*p_argidx)++;
3932 if (arglen < 0)
3933 return args;
3934 else
3935 return PyTuple_GetItem(args, argidx);
3936 }
3937 PyErr_SetString(PyExc_TypeError,
3938 "not enough arguments for format string");
3939 return NULL;
3940 }
3941
3942 /* Format codes
3943 * F_LJUST '-'
3944 * F_SIGN '+'
3945 * F_BLANK ' '
3946 * F_ALT '#'
3947 * F_ZERO '0'
3948 */
3949 #define F_LJUST (1<<0)
3950 #define F_SIGN (1<<1)
3951 #define F_BLANK (1<<2)
3952 #define F_ALT (1<<3)
3953 #define F_ZERO (1<<4)
3954
3955 /* Returns a new reference to a PyString object, or NULL on failure. */
3956
3957 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3958 formatfloat(PyObject *v, int flags, int prec, int type)
3959 {
3960 char *p;
3961 PyObject *result;
3962 double x;
3963
3964 x = PyFloat_AsDouble(v);
3965 if (x == -1.0 && PyErr_Occurred()) {
3966 PyErr_Format(PyExc_TypeError, "float argument required, "
3967 "not %.200s", Py_TYPE(v)->tp_name);
3968 return NULL;
3969 }
3970
3971 if (prec < 0)
3972 prec = 6;
3973
3974 p = PyOS_double_to_string(x, type, prec,
3975 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3976
3977 if (p == NULL)
3978 return NULL;
3979 result = PyString_FromStringAndSize(p, strlen(p));
3980 PyMem_Free(p);
3981 return result;
3982 }
3983
3984 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3985 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3986 * Python's regular ints.
3987 * Return value: a new PyString*, or NULL if error.
3988 * . *pbuf is set to point into it,
3989 * *plen set to the # of chars following that.
3990 * Caller must decref it when done using pbuf.
3991 * The string starting at *pbuf is of the form
3992 * "-"? ("0x" | "0X")? digit+
3993 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3994 * set in flags. The case of hex digits will be correct,
3995 * There will be at least prec digits, zero-filled on the left if
3996 * necessary to get that many.
3997 * val object to be converted
3998 * flags bitmask of format flags; only F_ALT is looked at
3999 * prec minimum number of digits; 0-fill on left if needed
4000 * type a character in [duoxX]; u acts the same as d
4001 *
4002 * CAUTION: o, x and X conversions on regular ints can never
4003 * produce a '-' sign, but can for Python's unbounded ints.
4004 */
4005 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4006 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4007 char **pbuf, int *plen)
4008 {
4009 PyObject *result = NULL, *r1;
4010 const char *s;
4011 char *buf;
4012 Py_ssize_t i;
4013 int sign; /* 1 if '-', else 0 */
4014 int len; /* number of characters */
4015 Py_ssize_t llen;
4016 int numdigits; /* len == numnondigits + skipped + numdigits */
4017 int numnondigits, skipped, filled;
4018 const char *method;
4019
4020 switch (type) {
4021 case 'd':
4022 case 'u':
4023 method = "str";
4024 result = Py_TYPE(val)->tp_str(val);
4025 break;
4026 case 'o':
4027 method = "oct";
4028 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4029 break;
4030 case 'x':
4031 case 'X':
4032 method = "hex";
4033 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4034 break;
4035 default:
4036 assert(!"'type' not in [duoxX]");
4037 }
4038 if (!result)
4039 return NULL;
4040
4041 if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4042 Py_DECREF(result);
4043 return NULL;
4044 }
4045 if (llen > INT_MAX) {
4046 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4047 Py_DECREF(result);
4048 return NULL;
4049 }
4050 len = (int)llen;
4051 if (len > 0 && s[len-1] == 'L') {
4052 --len;
4053 if (len == 0)
4054 goto error;
4055 }
4056 sign = s[0] == '-';
4057 numnondigits = sign;
4058
4059 /* Need to skip 0x, 0X or 0. */
4060 skipped = 0;
4061 switch (type) {
4062 case 'o':
4063 if (s[sign] != '0')
4064 goto error;
4065 /* If 0 is only digit, leave it alone. */
4066 if ((flags & F_ALT) == 0 && len - sign > 1)
4067 skipped = 1;
4068 break;
4069 case 'x':
4070 case 'X':
4071 if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4072 goto error;
4073 if ((flags & F_ALT) == 0)
4074 skipped = 2;
4075 else
4076 numnondigits += 2;
4077 break;
4078 }
4079 numdigits = len - numnondigits - skipped;
4080 if (numdigits <= 0)
4081 goto error;
4082
4083 filled = prec - numdigits;
4084 if (filled < 0)
4085 filled = 0;
4086 len = numnondigits + filled + numdigits;
4087
4088 /* To modify the string in-place, there can only be one reference. */
4089 if (skipped >= filled &&
4090 PyString_CheckExact(result) &&
4091 Py_REFCNT(result) == 1 &&
4092 !PyString_CHECK_INTERNED(result))
4093 {
4094 r1 = NULL;
4095 buf = (char *)s + skipped - filled;
4096 }
4097 else {
4098 r1 = result;
4099 result = PyString_FromStringAndSize(NULL, len);
4100 if (!result) {
4101 Py_DECREF(r1);
4102 return NULL;
4103 }
4104 buf = PyString_AS_STRING(result);
4105 }
4106
4107 for (i = numnondigits; --i >= 0;)
4108 buf[i] = s[i];
4109 buf += numnondigits;
4110 s += numnondigits + skipped;
4111 for (i = 0; i < filled; i++)
4112 *buf++ = '0';
4113 if (r1 == NULL) {
4114 assert(buf == s);
4115 buf += numdigits;
4116 }
4117 else {
4118 for (i = 0; i < numdigits; i++)
4119 *buf++ = *s++;
4120 }
4121 *buf = '\0';
4122 buf -= len;
4123 Py_XDECREF(r1);
4124
4125 /* Fix up case for hex conversions. */
4126 if (type == 'X') {
4127 /* Need to convert all lower case letters to upper case.
4128 and need to convert 0x to 0X (and -0x to -0X). */
4129 for (i = 0; i < len; i++) {
4130 if (buf[i] >= 'a' && buf[i] <= 'z')
4131 buf[i] -= 'a'-'A';
4132 }
4133 }
4134 *pbuf = buf;
4135 *plen = len;
4136 return result;
4137
4138 error:
4139 PyErr_Format(PyExc_ValueError,
4140 "%%%c format: invalid result of __%s__ (type=%.200s)",
4141 type, method, Py_TYPE(val)->tp_name);
4142 Py_DECREF(result);
4143 return NULL;
4144 }
4145
4146 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4147 formatint(char *buf, size_t buflen, int flags,
4148 int prec, int type, PyObject *v)
4149 {
4150 /* fmt = '%#.' + `prec` + 'l' + `type`
4151 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4152 + 1 + 1 = 24 */
4153 char fmt[64]; /* plenty big enough! */
4154 char *sign;
4155 long x;
4156
4157 x = PyInt_AsLong(v);
4158 if (x == -1 && PyErr_Occurred()) {
4159 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4160 Py_TYPE(v)->tp_name);
4161 return -1;
4162 }
4163 if (x < 0 && type == 'u') {
4164 type = 'd';
4165 }
4166 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4167 sign = "-";
4168 else
4169 sign = "";
4170 if (prec < 0)
4171 prec = 1;
4172
4173 if ((flags & F_ALT) &&
4174 (type == 'x' || type == 'X')) {
4175 /* When converting under %#x or %#X, there are a number
4176 * of issues that cause pain:
4177 * - when 0 is being converted, the C standard leaves off
4178 * the '0x' or '0X', which is inconsistent with other
4179 * %#x/%#X conversions and inconsistent with Python's
4180 * hex() function
4181 * - there are platforms that violate the standard and
4182 * convert 0 with the '0x' or '0X'
4183 * (Metrowerks, Compaq Tru64)
4184 * - there are platforms that give '0x' when converting
4185 * under %#X, but convert 0 in accordance with the
4186 * standard (OS/2 EMX)
4187 *
4188 * We can achieve the desired consistency by inserting our
4189 * own '0x' or '0X' prefix, and substituting %x/%X in place
4190 * of %#x/%#X.
4191 *
4192 * Note that this is the same approach as used in
4193 * formatint() in unicodeobject.c
4194 */
4195 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4196 sign, type, prec, type);
4197 }
4198 else {
4199 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4200 sign, (flags&F_ALT) ? "#" : "",
4201 prec, type);
4202 }
4203
4204 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4205 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4206 */
4207 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4208 PyErr_SetString(PyExc_OverflowError,
4209 "formatted integer is too long (precision too large?)");
4210 return -1;
4211 }
4212 if (sign[0])
4213 PyOS_snprintf(buf, buflen, fmt, -x);
4214 else
4215 PyOS_snprintf(buf, buflen, fmt, x);
4216 return (int)strlen(buf);
4217 }
4218
4219 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4220 formatchar(char *buf, size_t buflen, PyObject *v)
4221 {
4222 /* presume that the buffer is at least 2 characters long */
4223 if (PyString_Check(v)) {
4224 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4225 return -1;
4226 }
4227 else {
4228 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4229 return -1;
4230 }
4231 buf[1] = '\0';
4232 return 1;
4233 }
4234
4235 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4236
4237 FORMATBUFLEN is the length of the buffer in which the ints &
4238 chars are formatted. XXX This is a magic number. Each formatting
4239 routine does bounds checking to ensure no overflow, but a better
4240 solution may be to malloc a buffer of appropriate size for each
4241 format. For now, the current solution is sufficient.
4242 */
4243 #define FORMATBUFLEN (size_t)120
4244
4245 PyObject *
PyString_Format(PyObject * format,PyObject * args)4246 PyString_Format(PyObject *format, PyObject *args)
4247 {
4248 char *fmt, *res;
4249 Py_ssize_t arglen, argidx;
4250 Py_ssize_t reslen, rescnt, fmtcnt;
4251 int args_owned = 0;
4252 PyObject *result, *orig_args;
4253 #ifdef Py_USING_UNICODE
4254 PyObject *v, *w;
4255 #endif
4256 PyObject *dict = NULL;
4257 if (format == NULL || !PyString_Check(format) || args == NULL) {
4258 PyErr_BadInternalCall();
4259 return NULL;
4260 }
4261 orig_args = args;
4262 fmt = PyString_AS_STRING(format);
4263 fmtcnt = PyString_GET_SIZE(format);
4264 reslen = rescnt = fmtcnt + 100;
4265 result = PyString_FromStringAndSize((char *)NULL, reslen);
4266 if (result == NULL)
4267 return NULL;
4268 res = PyString_AsString(result);
4269 if (PyTuple_Check(args)) {
4270 arglen = PyTuple_GET_SIZE(args);
4271 argidx = 0;
4272 }
4273 else {
4274 arglen = -1;
4275 argidx = -2;
4276 }
4277 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4278 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4279 dict = args;
4280 while (--fmtcnt >= 0) {
4281 if (*fmt != '%') {
4282 if (--rescnt < 0) {
4283 rescnt = fmtcnt + 100;
4284 reslen += rescnt;
4285 if (_PyString_Resize(&result, reslen))
4286 return NULL;
4287 res = PyString_AS_STRING(result)
4288 + reslen - rescnt;
4289 --rescnt;
4290 }
4291 *res++ = *fmt++;
4292 }
4293 else {
4294 /* Got a format specifier */
4295 int flags = 0;
4296 Py_ssize_t width = -1;
4297 int prec = -1;
4298 int c = '\0';
4299 int fill;
4300 int isnumok;
4301 PyObject *v = NULL;
4302 PyObject *temp = NULL;
4303 char *pbuf;
4304 int sign;
4305 Py_ssize_t len;
4306 char formatbuf[FORMATBUFLEN];
4307 /* For format{int,char}() */
4308 #ifdef Py_USING_UNICODE
4309 char *fmt_start = fmt;
4310 Py_ssize_t argidx_start = argidx;
4311 #endif
4312
4313 fmt++;
4314 if (*fmt == '(') {
4315 char *keystart;
4316 Py_ssize_t keylen;
4317 PyObject *key;
4318 int pcount = 1;
4319
4320 if (dict == NULL) {
4321 PyErr_SetString(PyExc_TypeError,
4322 "format requires a mapping");
4323 goto error;
4324 }
4325 ++fmt;
4326 --fmtcnt;
4327 keystart = fmt;
4328 /* Skip over balanced parentheses */
4329 while (pcount > 0 && --fmtcnt >= 0) {
4330 if (*fmt == ')')
4331 --pcount;
4332 else if (*fmt == '(')
4333 ++pcount;
4334 fmt++;
4335 }
4336 keylen = fmt - keystart - 1;
4337 if (fmtcnt < 0 || pcount > 0) {
4338 PyErr_SetString(PyExc_ValueError,
4339 "incomplete format key");
4340 goto error;
4341 }
4342 key = PyString_FromStringAndSize(keystart,
4343 keylen);
4344 if (key == NULL)
4345 goto error;
4346 if (args_owned) {
4347 Py_DECREF(args);
4348 args_owned = 0;
4349 }
4350 args = PyObject_GetItem(dict, key);
4351 Py_DECREF(key);
4352 if (args == NULL) {
4353 goto error;
4354 }
4355 args_owned = 1;
4356 arglen = -1;
4357 argidx = -2;
4358 }
4359 while (--fmtcnt >= 0) {
4360 switch (c = *fmt++) {
4361 case '-': flags |= F_LJUST; continue;
4362 case '+': flags |= F_SIGN; continue;
4363 case ' ': flags |= F_BLANK; continue;
4364 case '#': flags |= F_ALT; continue;
4365 case '0': flags |= F_ZERO; continue;
4366 }
4367 break;
4368 }
4369 if (c == '*') {
4370 v = getnextarg(args, arglen, &argidx);
4371 if (v == NULL)
4372 goto error;
4373 if (!PyInt_Check(v)) {
4374 PyErr_SetString(PyExc_TypeError,
4375 "* wants int");
4376 goto error;
4377 }
4378 width = PyInt_AsSsize_t(v);
4379 if (width == -1 && PyErr_Occurred())
4380 goto error;
4381 if (width < 0) {
4382 flags |= F_LJUST;
4383 width = -width;
4384 }
4385 if (--fmtcnt >= 0)
4386 c = *fmt++;
4387 }
4388 else if (c >= 0 && isdigit(c)) {
4389 width = c - '0';
4390 while (--fmtcnt >= 0) {
4391 c = Py_CHARMASK(*fmt++);
4392 if (!isdigit(c))
4393 break;
4394 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4395 PyErr_SetString(
4396 PyExc_ValueError,
4397 "width too big");
4398 goto error;
4399 }
4400 width = width*10 + (c - '0');
4401 }
4402 }
4403 if (c == '.') {
4404 prec = 0;
4405 if (--fmtcnt >= 0)
4406 c = *fmt++;
4407 if (c == '*') {
4408 v = getnextarg(args, arglen, &argidx);
4409 if (v == NULL)
4410 goto error;
4411 if (!PyInt_Check(v)) {
4412 PyErr_SetString(
4413 PyExc_TypeError,
4414 "* wants int");
4415 goto error;
4416 }
4417 prec = _PyInt_AsInt(v);
4418 if (prec == -1 && PyErr_Occurred())
4419 goto error;
4420 if (prec < 0)
4421 prec = 0;
4422 if (--fmtcnt >= 0)
4423 c = *fmt++;
4424 }
4425 else if (c >= 0 && isdigit(c)) {
4426 prec = c - '0';
4427 while (--fmtcnt >= 0) {
4428 c = Py_CHARMASK(*fmt++);
4429 if (!isdigit(c))
4430 break;
4431 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4432 PyErr_SetString(
4433 PyExc_ValueError,
4434 "prec too big");
4435 goto error;
4436 }
4437 prec = prec*10 + (c - '0');
4438 }
4439 }
4440 } /* prec */
4441 if (fmtcnt >= 0) {
4442 if (c == 'h' || c == 'l' || c == 'L') {
4443 if (--fmtcnt >= 0)
4444 c = *fmt++;
4445 }
4446 }
4447 if (fmtcnt < 0) {
4448 PyErr_SetString(PyExc_ValueError,
4449 "incomplete format");
4450 goto error;
4451 }
4452 if (c != '%') {
4453 v = getnextarg(args, arglen, &argidx);
4454 if (v == NULL)
4455 goto error;
4456 }
4457 sign = 0;
4458 fill = ' ';
4459 switch (c) {
4460 case '%':
4461 pbuf = "%";
4462 len = 1;
4463 break;
4464 case 's':
4465 #ifdef Py_USING_UNICODE
4466 if (PyUnicode_Check(v)) {
4467 fmt = fmt_start;
4468 argidx = argidx_start;
4469 goto unicode;
4470 }
4471 #endif
4472 temp = _PyObject_Str(v);
4473 #ifdef Py_USING_UNICODE
4474 if (temp != NULL && PyUnicode_Check(temp)) {
4475 Py_DECREF(temp);
4476 fmt = fmt_start;
4477 argidx = argidx_start;
4478 goto unicode;
4479 }
4480 #endif
4481 /* Fall through */
4482 case 'r':
4483 if (c == 'r')
4484 temp = PyObject_Repr(v);
4485 if (temp == NULL)
4486 goto error;
4487 if (!PyString_Check(temp)) {
4488 PyErr_SetString(PyExc_TypeError,
4489 "%s argument has non-string str()");
4490 Py_DECREF(temp);
4491 goto error;
4492 }
4493 pbuf = PyString_AS_STRING(temp);
4494 len = PyString_GET_SIZE(temp);
4495 if (prec >= 0 && len > prec)
4496 len = prec;
4497 break;
4498 case 'i':
4499 case 'd':
4500 case 'u':
4501 case 'o':
4502 case 'x':
4503 case 'X':
4504 if (c == 'i')
4505 c = 'd';
4506 isnumok = 0;
4507 if (PyNumber_Check(v)) {
4508 PyObject *iobj=NULL;
4509
4510 if (PyInt_Check(v) || (PyLong_Check(v))) {
4511 iobj = v;
4512 Py_INCREF(iobj);
4513 }
4514 else {
4515 iobj = PyNumber_Int(v);
4516 if (iobj==NULL) {
4517 PyErr_Clear();
4518 iobj = PyNumber_Long(v);
4519 }
4520 }
4521 if (iobj!=NULL) {
4522 if (PyInt_Check(iobj)) {
4523 isnumok = 1;
4524 pbuf = formatbuf;
4525 len = formatint(pbuf,
4526 sizeof(formatbuf),
4527 flags, prec, c, iobj);
4528 Py_DECREF(iobj);
4529 if (len < 0)
4530 goto error;
4531 sign = 1;
4532 }
4533 else if (PyLong_Check(iobj)) {
4534 int ilen;
4535
4536 isnumok = 1;
4537 temp = _PyString_FormatLong(iobj, flags,
4538 prec, c, &pbuf, &ilen);
4539 Py_DECREF(iobj);
4540 len = ilen;
4541 if (!temp)
4542 goto error;
4543 sign = 1;
4544 }
4545 else {
4546 Py_DECREF(iobj);
4547 }
4548 }
4549 }
4550 if (!isnumok) {
4551 PyErr_Format(PyExc_TypeError,
4552 "%%%c format: a number is required, "
4553 "not %.200s", c, Py_TYPE(v)->tp_name);
4554 goto error;
4555 }
4556 if (flags & F_ZERO)
4557 fill = '0';
4558 break;
4559 case 'e':
4560 case 'E':
4561 case 'f':
4562 case 'F':
4563 case 'g':
4564 case 'G':
4565 temp = formatfloat(v, flags, prec, c);
4566 if (temp == NULL)
4567 goto error;
4568 pbuf = PyString_AS_STRING(temp);
4569 len = PyString_GET_SIZE(temp);
4570 sign = 1;
4571 if (flags & F_ZERO)
4572 fill = '0';
4573 break;
4574 case 'c':
4575 #ifdef Py_USING_UNICODE
4576 if (PyUnicode_Check(v)) {
4577 fmt = fmt_start;
4578 argidx = argidx_start;
4579 goto unicode;
4580 }
4581 #endif
4582 pbuf = formatbuf;
4583 len = formatchar(pbuf, sizeof(formatbuf), v);
4584 if (len < 0)
4585 goto error;
4586 break;
4587 default:
4588 PyErr_Format(PyExc_ValueError,
4589 "unsupported format character '%c' (0x%x) "
4590 "at index %zd",
4591 c, c,
4592 (Py_ssize_t)(fmt - 1 -
4593 PyString_AsString(format)));
4594 goto error;
4595 }
4596 if (sign) {
4597 if (*pbuf == '-' || *pbuf == '+') {
4598 sign = *pbuf++;
4599 len--;
4600 }
4601 else if (flags & F_SIGN)
4602 sign = '+';
4603 else if (flags & F_BLANK)
4604 sign = ' ';
4605 else
4606 sign = 0;
4607 }
4608 if (width < len)
4609 width = len;
4610 if (rescnt - (sign != 0) < width) {
4611 reslen -= rescnt;
4612 rescnt = width + fmtcnt + 100;
4613 reslen += rescnt;
4614 if (reslen < 0) {
4615 Py_DECREF(result);
4616 Py_XDECREF(temp);
4617 return PyErr_NoMemory();
4618 }
4619 if (_PyString_Resize(&result, reslen)) {
4620 Py_XDECREF(temp);
4621 return NULL;
4622 }
4623 res = PyString_AS_STRING(result)
4624 + reslen - rescnt;
4625 }
4626 if (sign) {
4627 if (fill != ' ')
4628 *res++ = sign;
4629 rescnt--;
4630 if (width > len)
4631 width--;
4632 }
4633 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4634 assert(pbuf[0] == '0');
4635 assert(pbuf[1] == c);
4636 if (fill != ' ') {
4637 *res++ = *pbuf++;
4638 *res++ = *pbuf++;
4639 }
4640 rescnt -= 2;
4641 width -= 2;
4642 if (width < 0)
4643 width = 0;
4644 len -= 2;
4645 }
4646 if (width > len && !(flags & F_LJUST)) {
4647 do {
4648 --rescnt;
4649 *res++ = fill;
4650 } while (--width > len);
4651 }
4652 if (fill == ' ') {
4653 if (sign)
4654 *res++ = sign;
4655 if ((flags & F_ALT) &&
4656 (c == 'x' || c == 'X')) {
4657 assert(pbuf[0] == '0');
4658 assert(pbuf[1] == c);
4659 *res++ = *pbuf++;
4660 *res++ = *pbuf++;
4661 }
4662 }
4663 Py_MEMCPY(res, pbuf, len);
4664 res += len;
4665 rescnt -= len;
4666 while (--width >= len) {
4667 --rescnt;
4668 *res++ = ' ';
4669 }
4670 if (dict && (argidx < arglen) && c != '%') {
4671 PyErr_SetString(PyExc_TypeError,
4672 "not all arguments converted during string formatting");
4673 Py_XDECREF(temp);
4674 goto error;
4675 }
4676 Py_XDECREF(temp);
4677 } /* '%' */
4678 } /* until end */
4679 if (argidx < arglen && !dict) {
4680 PyErr_SetString(PyExc_TypeError,
4681 "not all arguments converted during string formatting");
4682 goto error;
4683 }
4684 if (args_owned) {
4685 Py_DECREF(args);
4686 }
4687 if (_PyString_Resize(&result, reslen - rescnt))
4688 return NULL;
4689 return result;
4690
4691 #ifdef Py_USING_UNICODE
4692 unicode:
4693 if (args_owned) {
4694 Py_DECREF(args);
4695 args_owned = 0;
4696 }
4697 /* Fiddle args right (remove the first argidx arguments) */
4698 if (PyTuple_Check(orig_args) && argidx > 0) {
4699 PyObject *v;
4700 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4701 v = PyTuple_New(n);
4702 if (v == NULL)
4703 goto error;
4704 while (--n >= 0) {
4705 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4706 Py_INCREF(w);
4707 PyTuple_SET_ITEM(v, n, w);
4708 }
4709 args = v;
4710 } else {
4711 Py_INCREF(orig_args);
4712 args = orig_args;
4713 }
4714 args_owned = 1;
4715 /* Take what we have of the result and let the Unicode formatting
4716 function format the rest of the input. */
4717 rescnt = res - PyString_AS_STRING(result);
4718 if (_PyString_Resize(&result, rescnt))
4719 goto error;
4720 fmtcnt = PyString_GET_SIZE(format) - \
4721 (fmt - PyString_AS_STRING(format));
4722 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4723 if (format == NULL)
4724 goto error;
4725 v = PyUnicode_Format(format, args);
4726 Py_DECREF(format);
4727 if (v == NULL)
4728 goto error;
4729 /* Paste what we have (result) to what the Unicode formatting
4730 function returned (v) and return the result (or error) */
4731 w = PyUnicode_Concat(result, v);
4732 Py_DECREF(result);
4733 Py_DECREF(v);
4734 Py_DECREF(args);
4735 return w;
4736 #endif /* Py_USING_UNICODE */
4737
4738 error:
4739 Py_DECREF(result);
4740 if (args_owned) {
4741 Py_DECREF(args);
4742 }
4743 return NULL;
4744 }
4745
4746 void
PyString_InternInPlace(PyObject ** p)4747 PyString_InternInPlace(PyObject **p)
4748 {
4749 register PyStringObject *s = (PyStringObject *)(*p);
4750 PyObject *t;
4751 if (s == NULL || !PyString_Check(s))
4752 Py_FatalError("PyString_InternInPlace: strings only please!");
4753 /* If it's a string subclass, we don't really know what putting
4754 it in the interned dict might do. */
4755 if (!PyString_CheckExact(s))
4756 return;
4757 if (PyString_CHECK_INTERNED(s))
4758 return;
4759 if (interned == NULL) {
4760 interned = PyDict_New();
4761 if (interned == NULL) {
4762 PyErr_Clear(); /* Don't leave an exception */
4763 return;
4764 }
4765 }
4766 t = PyDict_GetItem(interned, (PyObject *)s);
4767 if (t) {
4768 Py_INCREF(t);
4769 Py_SETREF(*p, t);
4770 return;
4771 }
4772
4773 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4774 PyErr_Clear();
4775 return;
4776 }
4777 /* The two references in interned are not counted by refcnt.
4778 The string deallocator will take care of this */
4779 Py_REFCNT(s) -= 2;
4780 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4781 }
4782
4783 void
PyString_InternImmortal(PyObject ** p)4784 PyString_InternImmortal(PyObject **p)
4785 {
4786 PyString_InternInPlace(p);
4787 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4788 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4789 Py_INCREF(*p);
4790 }
4791 }
4792
4793
4794 PyObject *
PyString_InternFromString(const char * cp)4795 PyString_InternFromString(const char *cp)
4796 {
4797 PyObject *s = PyString_FromString(cp);
4798 if (s == NULL)
4799 return NULL;
4800 PyString_InternInPlace(&s);
4801 return s;
4802 }
4803
4804 void
PyString_Fini(void)4805 PyString_Fini(void)
4806 {
4807 int i;
4808 for (i = 0; i < UCHAR_MAX + 1; i++)
4809 Py_CLEAR(characters[i]);
4810 Py_CLEAR(nullstring);
4811 }
4812
_Py_ReleaseInternedStrings(void)4813 void _Py_ReleaseInternedStrings(void)
4814 {
4815 PyObject *keys;
4816 PyStringObject *s;
4817 Py_ssize_t i, n;
4818 Py_ssize_t immortal_size = 0, mortal_size = 0;
4819
4820 if (interned == NULL || !PyDict_Check(interned))
4821 return;
4822 keys = PyDict_Keys(interned);
4823 if (keys == NULL || !PyList_Check(keys)) {
4824 PyErr_Clear();
4825 return;
4826 }
4827
4828 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4829 detector, interned strings are not forcibly deallocated; rather, we
4830 give them their stolen references back, and then clear and DECREF
4831 the interned dict. */
4832
4833 n = PyList_GET_SIZE(keys);
4834 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4835 n);
4836 for (i = 0; i < n; i++) {
4837 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4838 switch (s->ob_sstate) {
4839 case SSTATE_NOT_INTERNED:
4840 /* XXX Shouldn't happen */
4841 break;
4842 case SSTATE_INTERNED_IMMORTAL:
4843 Py_REFCNT(s) += 1;
4844 immortal_size += Py_SIZE(s);
4845 break;
4846 case SSTATE_INTERNED_MORTAL:
4847 Py_REFCNT(s) += 2;
4848 mortal_size += Py_SIZE(s);
4849 break;
4850 default:
4851 Py_FatalError("Inconsistent interned string state.");
4852 }
4853 s->ob_sstate = SSTATE_NOT_INTERNED;
4854 }
4855 fprintf(stderr, "total size of all interned strings: "
4856 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4857 "mortal/immortal\n", mortal_size, immortal_size);
4858 Py_DECREF(keys);
4859 PyDict_Clear(interned);
4860 Py_CLEAR(interned);
4861 }
4862