• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* String (str/bytes) object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8 
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12 
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15 
16 /* This dictionary holds all interned strings.  Note that references to
17    strings in this dictionary are *not* counted in the string's ob_refcnt.
18    When the interned string reaches a refcnt of 0 the string deallocation
19    function will delete the reference from this dictionary.
20 
21    Another way to look at this is that to say that the actual reference
22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25 
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27    for a string of length n should request PyStringObject_SIZE + n bytes.
28 
29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30    3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33 
34 /*
35    For PyString_FromString(), the parameter `str' points to a null-terminated
36    string containing exactly `size' bytes.
37 
38    For PyString_FromStringAndSize(), the parameter `str' is
39    either NULL or else points to a string containing at least `size' bytes.
40    For PyString_FromStringAndSize(), the string in the `str' parameter does
41    not have to be null-terminated.  (Therefore it is safe to construct a
42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44    bytes (setting the last byte to the null terminating character) and you can
45    fill in the data yourself.  If `str' is non-NULL then the resulting
46    PyString object must be treated as immutable and you must not fill in nor
47    alter the data yourself, since the strings may be shared.
48 
49    The PyObject member `op->ob_size', which denotes the number of "extra
50    items" in a variable-size object, will contain the number of bytes
51    allocated for string data, not counting the null terminating character.
52    It is therefore equal to the `size' parameter (for
53    PyString_FromStringAndSize()) or the length of the string in the `str'
54    parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59     register PyStringObject *op;
60     if (size < 0) {
61         PyErr_SetString(PyExc_SystemError,
62             "Negative size passed to PyString_FromStringAndSize");
63         return NULL;
64     }
65     if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67         null_strings++;
68 #endif
69         Py_INCREF(op);
70         return (PyObject *)op;
71     }
72     if (size == 1 && str != NULL &&
73         (op = characters[*str & UCHAR_MAX]) != NULL)
74     {
75 #ifdef COUNT_ALLOCS
76         one_strings++;
77 #endif
78         Py_INCREF(op);
79         return (PyObject *)op;
80     }
81 
82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83         PyErr_SetString(PyExc_OverflowError, "string is too large");
84         return NULL;
85     }
86 
87     /* Inline PyObject_NewVar */
88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89     if (op == NULL)
90         return PyErr_NoMemory();
91     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92     op->ob_shash = -1;
93     op->ob_sstate = SSTATE_NOT_INTERNED;
94     if (str != NULL)
95         Py_MEMCPY(op->ob_sval, str, size);
96     op->ob_sval[size] = '\0';
97     /* share short strings */
98     if (size == 0) {
99         PyObject *t = (PyObject *)op;
100         PyString_InternInPlace(&t);
101         op = (PyStringObject *)t;
102         nullstring = op;
103         Py_INCREF(op);
104     } else if (size == 1 && str != NULL) {
105         PyObject *t = (PyObject *)op;
106         PyString_InternInPlace(&t);
107         op = (PyStringObject *)t;
108         characters[*str & UCHAR_MAX] = op;
109         Py_INCREF(op);
110     }
111     return (PyObject *) op;
112 }
113 
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117     register size_t size;
118     register PyStringObject *op;
119 
120     assert(str != NULL);
121     size = strlen(str);
122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123         PyErr_SetString(PyExc_OverflowError,
124             "string is too long for a Python string");
125         return NULL;
126     }
127     if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129         null_strings++;
130 #endif
131         Py_INCREF(op);
132         return (PyObject *)op;
133     }
134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136         one_strings++;
137 #endif
138         Py_INCREF(op);
139         return (PyObject *)op;
140     }
141 
142     /* Inline PyObject_NewVar */
143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144     if (op == NULL)
145         return PyErr_NoMemory();
146     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147     op->ob_shash = -1;
148     op->ob_sstate = SSTATE_NOT_INTERNED;
149     Py_MEMCPY(op->ob_sval, str, size+1);
150     /* share short strings */
151     if (size == 0) {
152         PyObject *t = (PyObject *)op;
153         PyString_InternInPlace(&t);
154         op = (PyStringObject *)t;
155         nullstring = op;
156         Py_INCREF(op);
157     } else if (size == 1) {
158         PyObject *t = (PyObject *)op;
159         PyString_InternInPlace(&t);
160         op = (PyStringObject *)t;
161         characters[*str & UCHAR_MAX] = op;
162         Py_INCREF(op);
163     }
164     return (PyObject *) op;
165 }
166 
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170     va_list count;
171     Py_ssize_t n = 0;
172     const char* f;
173     char *s;
174     PyObject* string;
175 
176 #ifdef VA_LIST_IS_ARRAY
177     Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef  __va_copy
180     __va_copy(count, vargs);
181 #else
182     count = vargs;
183 #endif
184 #endif
185     /* step 1: figure out how large a buffer we need */
186     for (f = format; *f; f++) {
187         if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189             int longlongflag = 0;
190 #endif
191             const char* p = f;
192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193                 ;
194 
195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196              * they don't affect the amount of space we reserve.
197              */
198             if (*f == 'l') {
199                 if (f[1] == 'd' || f[1] == 'u') {
200                     ++f;
201                 }
202 #ifdef HAVE_LONG_LONG
203                 else if (f[1] == 'l' &&
204                          (f[2] == 'd' || f[2] == 'u')) {
205                     longlongflag = 1;
206                     f += 2;
207                 }
208 #endif
209             }
210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211                 ++f;
212             }
213 
214             switch (*f) {
215             case 'c':
216                 (void)va_arg(count, int);
217                 /* fall through... */
218             case '%':
219                 n++;
220                 break;
221             case 'd': case 'u': case 'i': case 'x':
222                 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224                 /* Need at most
225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226                    plus 1 for the sign.  53/22 is an upper
227                    bound for log10(256). */
228                 if (longlongflag)
229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230                 else
231 #endif
232                     /* 20 bytes is enough to hold a 64-bit
233                        integer.  Decimal takes the most
234                        space.  This isn't enough for
235                        octal. */
236                     n += 20;
237 
238                 break;
239             case 's':
240                 s = va_arg(count, char*);
241                 n += strlen(s);
242                 break;
243             case 'p':
244                 (void) va_arg(count, int);
245                 /* maximum 64-bit pointer representation:
246                  * 0xffffffffffffffff
247                  * so 19 characters is enough.
248                  * XXX I count 18 -- what's the extra for?
249                  */
250                 n += 19;
251                 break;
252             default:
253                 /* if we stumble upon an unknown
254                    formatting code, copy the rest of
255                    the format string to the output
256                    string. (we cannot just skip the
257                    code, since there's no way to know
258                    what's in the argument list) */
259                 n += strlen(p);
260                 goto expand;
261             }
262         } else
263             n++;
264     }
265  expand:
266     /* step 2: fill the buffer */
267     /* Since we've analyzed how much space we need for the worst case,
268        use sprintf directly instead of the slower PyOS_snprintf. */
269     string = PyString_FromStringAndSize(NULL, n);
270     if (!string)
271         return NULL;
272 
273     s = PyString_AsString(string);
274 
275     for (f = format; *f; f++) {
276         if (*f == '%') {
277             const char* p = f++;
278             Py_ssize_t i;
279             int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281             int longlongflag = 0;
282 #endif
283             int size_tflag = 0;
284             /* parse the width.precision part (we're only
285                interested in the precision value, if any) */
286             n = 0;
287             while (isdigit(Py_CHARMASK(*f)))
288                 n = (n*10) + *f++ - '0';
289             if (*f == '.') {
290                 f++;
291                 n = 0;
292                 while (isdigit(Py_CHARMASK(*f)))
293                     n = (n*10) + *f++ - '0';
294             }
295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296                 f++;
297             /* Handle %ld, %lu, %lld and %llu. */
298             if (*f == 'l') {
299                 if (f[1] == 'd' || f[1] == 'u') {
300                     longflag = 1;
301                     ++f;
302                 }
303 #ifdef HAVE_LONG_LONG
304                 else if (f[1] == 'l' &&
305                          (f[2] == 'd' || f[2] == 'u')) {
306                     longlongflag = 1;
307                     f += 2;
308                 }
309 #endif
310             }
311             /* handle the size_t flag. */
312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313                 size_tflag = 1;
314                 ++f;
315             }
316 
317             switch (*f) {
318             case 'c':
319                 *s++ = va_arg(vargs, int);
320                 break;
321             case 'd':
322                 if (longflag)
323                     sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325                 else if (longlongflag)
326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327                         va_arg(vargs, PY_LONG_LONG));
328 #endif
329                 else if (size_tflag)
330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331                         va_arg(vargs, Py_ssize_t));
332                 else
333                     sprintf(s, "%d", va_arg(vargs, int));
334                 s += strlen(s);
335                 break;
336             case 'u':
337                 if (longflag)
338                     sprintf(s, "%lu",
339                         va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341                 else if (longlongflag)
342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343                         va_arg(vargs, PY_LONG_LONG));
344 #endif
345                 else if (size_tflag)
346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347                         va_arg(vargs, size_t));
348                 else
349                     sprintf(s, "%u",
350                         va_arg(vargs, unsigned int));
351                 s += strlen(s);
352                 break;
353             case 'i':
354                 sprintf(s, "%i", va_arg(vargs, int));
355                 s += strlen(s);
356                 break;
357             case 'x':
358                 sprintf(s, "%x", va_arg(vargs, int));
359                 s += strlen(s);
360                 break;
361             case 's':
362                 p = va_arg(vargs, char*);
363                 i = strlen(p);
364                 if (n > 0 && i > n)
365                     i = n;
366                 Py_MEMCPY(s, p, i);
367                 s += i;
368                 break;
369             case 'p':
370                 sprintf(s, "%p", va_arg(vargs, void*));
371                 /* %p is ill-defined:  ensure leading 0x. */
372                 if (s[1] == 'X')
373                     s[1] = 'x';
374                 else if (s[1] != 'x') {
375                     memmove(s+2, s, strlen(s)+1);
376                     s[0] = '0';
377                     s[1] = 'x';
378                 }
379                 s += strlen(s);
380                 break;
381             case '%':
382                 *s++ = '%';
383                 break;
384             default:
385                 strcpy(s, p);
386                 s += strlen(s);
387                 goto end;
388             }
389         } else
390             *s++ = *f;
391     }
392 
393  end:
394     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395         return NULL;
396     return string;
397 }
398 
399 PyObject *
PyString_FromFormat(const char * format,...)400 PyString_FromFormat(const char *format, ...)
401 {
402     PyObject* ret;
403     va_list vargs;
404 
405 #ifdef HAVE_STDARG_PROTOTYPES
406     va_start(vargs, format);
407 #else
408     va_start(vargs);
409 #endif
410     ret = PyString_FromFormatV(format, vargs);
411     va_end(vargs);
412     return ret;
413 }
414 
415 
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)416 PyObject *PyString_Decode(const char *s,
417                           Py_ssize_t size,
418                           const char *encoding,
419                           const char *errors)
420 {
421     PyObject *v, *str;
422 
423     str = PyString_FromStringAndSize(s, size);
424     if (str == NULL)
425         return NULL;
426     v = PyString_AsDecodedString(str, encoding, errors);
427     Py_DECREF(str);
428     return v;
429 }
430 
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)431 PyObject *PyString_AsDecodedObject(PyObject *str,
432                                    const char *encoding,
433                                    const char *errors)
434 {
435     PyObject *v;
436 
437     if (!PyString_Check(str)) {
438         PyErr_BadArgument();
439         goto onError;
440     }
441 
442     if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444         encoding = PyUnicode_GetDefaultEncoding();
445 #else
446         PyErr_SetString(PyExc_ValueError, "no encoding specified");
447         goto onError;
448 #endif
449     }
450 
451     /* Decode via the codec registry */
452     v = _PyCodec_DecodeText(str, encoding, errors);
453     if (v == NULL)
454         goto onError;
455 
456     return v;
457 
458  onError:
459     return NULL;
460 }
461 
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)462 PyObject *PyString_AsDecodedString(PyObject *str,
463                                    const char *encoding,
464                                    const char *errors)
465 {
466     PyObject *v;
467 
468     v = PyString_AsDecodedObject(str, encoding, errors);
469     if (v == NULL)
470         goto onError;
471 
472 #ifdef Py_USING_UNICODE
473     /* Convert Unicode to a string using the default encoding */
474     if (PyUnicode_Check(v)) {
475         PyObject *temp = v;
476         v = PyUnicode_AsEncodedString(v, NULL, NULL);
477         Py_DECREF(temp);
478         if (v == NULL)
479             goto onError;
480     }
481 #endif
482     if (!PyString_Check(v)) {
483         PyErr_Format(PyExc_TypeError,
484                      "decoder did not return a string object (type=%.400s)",
485                      Py_TYPE(v)->tp_name);
486         Py_DECREF(v);
487         goto onError;
488     }
489 
490     return v;
491 
492  onError:
493     return NULL;
494 }
495 
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)496 PyObject *PyString_Encode(const char *s,
497                           Py_ssize_t size,
498                           const char *encoding,
499                           const char *errors)
500 {
501     PyObject *v, *str;
502 
503     str = PyString_FromStringAndSize(s, size);
504     if (str == NULL)
505         return NULL;
506     v = PyString_AsEncodedString(str, encoding, errors);
507     Py_DECREF(str);
508     return v;
509 }
510 
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)511 PyObject *PyString_AsEncodedObject(PyObject *str,
512                                    const char *encoding,
513                                    const char *errors)
514 {
515     PyObject *v;
516 
517     if (!PyString_Check(str)) {
518         PyErr_BadArgument();
519         goto onError;
520     }
521 
522     if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524         encoding = PyUnicode_GetDefaultEncoding();
525 #else
526         PyErr_SetString(PyExc_ValueError, "no encoding specified");
527         goto onError;
528 #endif
529     }
530 
531     /* Encode via the codec registry */
532     v = _PyCodec_EncodeText(str, encoding, errors);
533     if (v == NULL)
534         goto onError;
535 
536     return v;
537 
538  onError:
539     return NULL;
540 }
541 
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)542 PyObject *PyString_AsEncodedString(PyObject *str,
543                                    const char *encoding,
544                                    const char *errors)
545 {
546     PyObject *v;
547 
548     v = PyString_AsEncodedObject(str, encoding, errors);
549     if (v == NULL)
550         goto onError;
551 
552 #ifdef Py_USING_UNICODE
553     /* Convert Unicode to a string using the default encoding */
554     if (PyUnicode_Check(v)) {
555         PyObject *temp = v;
556         v = PyUnicode_AsEncodedString(v, NULL, NULL);
557         Py_DECREF(temp);
558         if (v == NULL)
559             goto onError;
560     }
561 #endif
562     if (!PyString_Check(v)) {
563         PyErr_Format(PyExc_TypeError,
564                      "encoder did not return a string object (type=%.400s)",
565                      Py_TYPE(v)->tp_name);
566         Py_DECREF(v);
567         goto onError;
568     }
569 
570     return v;
571 
572  onError:
573     return NULL;
574 }
575 
576 static void
string_dealloc(PyObject * op)577 string_dealloc(PyObject *op)
578 {
579     switch (PyString_CHECK_INTERNED(op)) {
580         case SSTATE_NOT_INTERNED:
581             break;
582 
583         case SSTATE_INTERNED_MORTAL:
584             /* revive dead object temporarily for DelItem */
585             Py_REFCNT(op) = 3;
586             if (PyDict_DelItem(interned, op) != 0)
587                 Py_FatalError(
588                     "deletion of interned string failed");
589             break;
590 
591         case SSTATE_INTERNED_IMMORTAL:
592             Py_FatalError("Immortal interned string died.");
593 
594         default:
595             Py_FatalError("Inconsistent interned string state.");
596     }
597     Py_TYPE(op)->tp_free(op);
598 }
599 
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601    the string is a u-literal. If recode_encoding is non-zero,
602    the string is UTF-8 encoded and should be re-encoded in the
603    specified encoding.  */
604 
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)605 PyObject *PyString_DecodeEscape(const char *s,
606                                 Py_ssize_t len,
607                                 const char *errors,
608                                 Py_ssize_t unicode,
609                                 const char *recode_encoding)
610 {
611     int c;
612     char *p, *buf;
613     const char *end;
614     PyObject *v;
615     Py_ssize_t newlen = recode_encoding ? 4*len:len;
616     v = PyString_FromStringAndSize((char *)NULL, newlen);
617     if (v == NULL)
618         return NULL;
619     p = buf = PyString_AsString(v);
620     end = s + len;
621     while (s < end) {
622         if (*s != '\\') {
623           non_esc:
624 #ifdef Py_USING_UNICODE
625             if (recode_encoding && (*s & 0x80)) {
626                 PyObject *u, *w;
627                 char *r;
628                 const char* t;
629                 Py_ssize_t rn;
630                 t = s;
631                 /* Decode non-ASCII bytes as UTF-8. */
632                 while (t < end && (*t & 0x80)) t++;
633                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634                 if(!u) goto failed;
635 
636                 /* Recode them in target encoding. */
637                 w = PyUnicode_AsEncodedString(
638                     u, recode_encoding, errors);
639                 Py_DECREF(u);
640                 if (!w)                 goto failed;
641 
642                 /* Append bytes to output buffer. */
643                 assert(PyString_Check(w));
644                 r = PyString_AS_STRING(w);
645                 rn = PyString_GET_SIZE(w);
646                 Py_MEMCPY(p, r, rn);
647                 p += rn;
648                 Py_DECREF(w);
649                 s = t;
650             } else {
651                 *p++ = *s++;
652             }
653 #else
654             *p++ = *s++;
655 #endif
656             continue;
657         }
658         s++;
659         if (s==end) {
660             PyErr_SetString(PyExc_ValueError,
661                             "Trailing \\ in string");
662             goto failed;
663         }
664         switch (*s++) {
665         /* XXX This assumes ASCII! */
666         case '\n': break;
667         case '\\': *p++ = '\\'; break;
668         case '\'': *p++ = '\''; break;
669         case '\"': *p++ = '\"'; break;
670         case 'b': *p++ = '\b'; break;
671         case 'f': *p++ = '\014'; break; /* FF */
672         case 't': *p++ = '\t'; break;
673         case 'n': *p++ = '\n'; break;
674         case 'r': *p++ = '\r'; break;
675         case 'v': *p++ = '\013'; break; /* VT */
676         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677         case '0': case '1': case '2': case '3':
678         case '4': case '5': case '6': case '7':
679             c = s[-1] - '0';
680             if (s < end && '0' <= *s && *s <= '7') {
681                 c = (c<<3) + *s++ - '0';
682                 if (s < end && '0' <= *s && *s <= '7')
683                     c = (c<<3) + *s++ - '0';
684             }
685             *p++ = c;
686             break;
687         case 'x':
688             if (s+1 < end &&
689                 isxdigit(Py_CHARMASK(s[0])) &&
690                 isxdigit(Py_CHARMASK(s[1])))
691             {
692                 unsigned int x = 0;
693                 c = Py_CHARMASK(*s);
694                 s++;
695                 if (isdigit(c))
696                     x = c - '0';
697                 else if (islower(c))
698                     x = 10 + c - 'a';
699                 else
700                     x = 10 + c - 'A';
701                 x = x << 4;
702                 c = Py_CHARMASK(*s);
703                 s++;
704                 if (isdigit(c))
705                     x += c - '0';
706                 else if (islower(c))
707                     x += 10 + c - 'a';
708                 else
709                     x += 10 + c - 'A';
710                 *p++ = x;
711                 break;
712             }
713             if (!errors || strcmp(errors, "strict") == 0) {
714                 PyErr_SetString(PyExc_ValueError,
715                                 "invalid \\x escape");
716                 goto failed;
717             }
718             if (strcmp(errors, "replace") == 0) {
719                 *p++ = '?';
720             } else if (strcmp(errors, "ignore") == 0)
721                 /* do nothing */;
722             else {
723                 PyErr_Format(PyExc_ValueError,
724                              "decoding error; "
725                              "unknown error handling code: %.400s",
726                              errors);
727                 goto failed;
728             }
729             /* skip \x */
730             if (s < end && isxdigit(Py_CHARMASK(s[0])))
731                 s++; /* and a hexdigit */
732             break;
733 #ifndef Py_USING_UNICODE
734         case 'u':
735         case 'U':
736         case 'N':
737             if (unicode) {
738                 PyErr_SetString(PyExc_ValueError,
739                           "Unicode escapes not legal "
740                           "when Unicode disabled");
741                 goto failed;
742             }
743 #endif
744         default:
745             *p++ = '\\';
746             s--;
747             goto non_esc; /* an arbitrary number of unescaped
748                              UTF-8 bytes may follow. */
749         }
750     }
751     if (p-buf < newlen)
752         _PyString_Resize(&v, p - buf); /* v is cleared on error */
753     return v;
754   failed:
755     Py_DECREF(v);
756     return NULL;
757 }
758 
759 /* -------------------------------------------------------------------- */
760 /* object api */
761 
762 static Py_ssize_t
string_getsize(register PyObject * op)763 string_getsize(register PyObject *op)
764 {
765     char *s;
766     Py_ssize_t len;
767     if (PyString_AsStringAndSize(op, &s, &len))
768         return -1;
769     return len;
770 }
771 
772 static /*const*/ char *
string_getbuffer(register PyObject * op)773 string_getbuffer(register PyObject *op)
774 {
775     char *s;
776     Py_ssize_t len;
777     if (PyString_AsStringAndSize(op, &s, &len))
778         return NULL;
779     return s;
780 }
781 
782 Py_ssize_t
PyString_Size(register PyObject * op)783 PyString_Size(register PyObject *op)
784 {
785     if (!PyString_Check(op))
786         return string_getsize(op);
787     return Py_SIZE(op);
788 }
789 
790 /*const*/ char *
PyString_AsString(register PyObject * op)791 PyString_AsString(register PyObject *op)
792 {
793     if (!PyString_Check(op))
794         return string_getbuffer(op);
795     return ((PyStringObject *)op) -> ob_sval;
796 }
797 
798 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)799 PyString_AsStringAndSize(register PyObject *obj,
800                          register char **s,
801                          register Py_ssize_t *len)
802 {
803     if (s == NULL) {
804         PyErr_BadInternalCall();
805         return -1;
806     }
807 
808     if (!PyString_Check(obj)) {
809 #ifdef Py_USING_UNICODE
810         if (PyUnicode_Check(obj)) {
811             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812             if (obj == NULL)
813                 return -1;
814         }
815         else
816 #endif
817         {
818             PyErr_Format(PyExc_TypeError,
819                          "expected string or Unicode object, "
820                          "%.200s found", Py_TYPE(obj)->tp_name);
821             return -1;
822         }
823     }
824 
825     *s = PyString_AS_STRING(obj);
826     if (len != NULL)
827         *len = PyString_GET_SIZE(obj);
828     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829         PyErr_SetString(PyExc_TypeError,
830                         "expected string without null bytes");
831         return -1;
832     }
833     return 0;
834 }
835 
836 /* -------------------------------------------------------------------- */
837 /* Methods */
838 
839 #include "stringlib/stringdefs.h"
840 #include "stringlib/fastsearch.h"
841 
842 #include "stringlib/count.h"
843 #include "stringlib/find.h"
844 #include "stringlib/partition.h"
845 #include "stringlib/split.h"
846 
847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848 #include "stringlib/localeutil.h"
849 
850 
851 
852 static int
string_print(PyStringObject * op,FILE * fp,int flags)853 string_print(PyStringObject *op, FILE *fp, int flags)
854 {
855     Py_ssize_t i, str_len;
856     char c;
857     int quote;
858 
859     /* XXX Ought to check for interrupts when writing long strings */
860     if (! PyString_CheckExact(op)) {
861         int ret;
862         /* A str subclass may have its own __str__ method. */
863         op = (PyStringObject *) PyObject_Str((PyObject *)op);
864         if (op == NULL)
865             return -1;
866         ret = string_print(op, fp, flags);
867         Py_DECREF(op);
868         return ret;
869     }
870     if (flags & Py_PRINT_RAW) {
871         char *data = op->ob_sval;
872         Py_ssize_t size = Py_SIZE(op);
873         Py_BEGIN_ALLOW_THREADS
874         while (size > INT_MAX) {
875             /* Very long strings cannot be written atomically.
876              * But don't write exactly INT_MAX bytes at a time
877              * to avoid memory aligment issues.
878              */
879             const int chunk_size = INT_MAX & ~0x3FFF;
880             fwrite(data, 1, chunk_size, fp);
881             data += chunk_size;
882             size -= chunk_size;
883         }
884 #ifdef __VMS
885         if (size) fwrite(data, (size_t)size, 1, fp);
886 #else
887         fwrite(data, 1, (size_t)size, fp);
888 #endif
889         Py_END_ALLOW_THREADS
890         return 0;
891     }
892 
893     /* figure out which quote to use; single is preferred */
894     quote = '\'';
895     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896         !memchr(op->ob_sval, '"', Py_SIZE(op)))
897         quote = '"';
898 
899     str_len = Py_SIZE(op);
900     Py_BEGIN_ALLOW_THREADS
901     fputc(quote, fp);
902     for (i = 0; i < str_len; i++) {
903         /* Since strings are immutable and the caller should have a
904         reference, accessing the internal buffer should not be an issue
905         with the GIL released. */
906         c = op->ob_sval[i];
907         if (c == quote || c == '\\')
908             fprintf(fp, "\\%c", c);
909         else if (c == '\t')
910             fprintf(fp, "\\t");
911         else if (c == '\n')
912             fprintf(fp, "\\n");
913         else if (c == '\r')
914             fprintf(fp, "\\r");
915         else if (c < ' ' || c >= 0x7f)
916             fprintf(fp, "\\x%02x", c & 0xff);
917         else
918             fputc(c, fp);
919     }
920     fputc(quote, fp);
921     Py_END_ALLOW_THREADS
922     return 0;
923 }
924 
925 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)926 PyString_Repr(PyObject *obj, int smartquotes)
927 {
928     register PyStringObject* op = (PyStringObject*) obj;
929     size_t newsize;
930     PyObject *v;
931     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
932         PyErr_SetString(PyExc_OverflowError,
933             "string is too large to make repr");
934         return NULL;
935     }
936     newsize = 2 + 4*Py_SIZE(op);
937     v = PyString_FromStringAndSize((char *)NULL, newsize);
938     if (v == NULL) {
939         return NULL;
940     }
941     else {
942         register Py_ssize_t i;
943         register char c;
944         register char *p;
945         int quote;
946 
947         /* figure out which quote to use; single is preferred */
948         quote = '\'';
949         if (smartquotes &&
950             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
951             !memchr(op->ob_sval, '"', Py_SIZE(op)))
952             quote = '"';
953 
954         p = PyString_AS_STRING(v);
955         *p++ = quote;
956         for (i = 0; i < Py_SIZE(op); i++) {
957             /* There's at least enough room for a hex escape
958                and a closing quote. */
959             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
960             c = op->ob_sval[i];
961             if (c == quote || c == '\\')
962                 *p++ = '\\', *p++ = c;
963             else if (c == '\t')
964                 *p++ = '\\', *p++ = 't';
965             else if (c == '\n')
966                 *p++ = '\\', *p++ = 'n';
967             else if (c == '\r')
968                 *p++ = '\\', *p++ = 'r';
969             else if (c < ' ' || c >= 0x7f) {
970                 /* For performance, we don't want to call
971                    PyOS_snprintf here (extra layers of
972                    function call). */
973                 sprintf(p, "\\x%02x", c & 0xff);
974                 p += 4;
975             }
976             else
977                 *p++ = c;
978         }
979         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
980         *p++ = quote;
981         *p = '\0';
982         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
983             return NULL;
984         return v;
985     }
986 }
987 
988 static PyObject *
string_repr(PyObject * op)989 string_repr(PyObject *op)
990 {
991     return PyString_Repr(op, 1);
992 }
993 
994 static PyObject *
string_str(PyObject * s)995 string_str(PyObject *s)
996 {
997     assert(PyString_Check(s));
998     if (PyString_CheckExact(s)) {
999         Py_INCREF(s);
1000         return s;
1001     }
1002     else {
1003         /* Subtype -- return genuine string with the same value. */
1004         PyStringObject *t = (PyStringObject *) s;
1005         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1006     }
1007 }
1008 
1009 static Py_ssize_t
string_length(PyStringObject * a)1010 string_length(PyStringObject *a)
1011 {
1012     return Py_SIZE(a);
1013 }
1014 
1015 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1016 string_concat(register PyStringObject *a, register PyObject *bb)
1017 {
1018     register Py_ssize_t size;
1019     register PyStringObject *op;
1020     if (!PyString_Check(bb)) {
1021 #ifdef Py_USING_UNICODE
1022         if (PyUnicode_Check(bb))
1023             return PyUnicode_Concat((PyObject *)a, bb);
1024 #endif
1025         if (PyByteArray_Check(bb))
1026             return PyByteArray_Concat((PyObject *)a, bb);
1027         PyErr_Format(PyExc_TypeError,
1028                      "cannot concatenate 'str' and '%.200s' objects",
1029                      Py_TYPE(bb)->tp_name);
1030         return NULL;
1031     }
1032 #define b ((PyStringObject *)bb)
1033     /* Optimize cases with empty left or right operand */
1034     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1035         PyString_CheckExact(a) && PyString_CheckExact(b)) {
1036         if (Py_SIZE(a) == 0) {
1037             Py_INCREF(bb);
1038             return bb;
1039         }
1040         Py_INCREF(a);
1041         return (PyObject *)a;
1042     }
1043     /* Check that string sizes are not negative, to prevent an
1044        overflow in cases where we are passed incorrectly-created
1045        strings with negative lengths (due to a bug in other code).
1046     */
1047     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049         PyErr_SetString(PyExc_OverflowError,
1050                         "strings are too large to concat");
1051         return NULL;
1052     }
1053     size = Py_SIZE(a) + Py_SIZE(b);
1054 
1055     /* Inline PyObject_NewVar */
1056     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1057         PyErr_SetString(PyExc_OverflowError,
1058                         "strings are too large to concat");
1059         return NULL;
1060     }
1061     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1062     if (op == NULL)
1063         return PyErr_NoMemory();
1064     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1065     op->ob_shash = -1;
1066     op->ob_sstate = SSTATE_NOT_INTERNED;
1067     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1068     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1069     op->ob_sval[size] = '\0';
1070     return (PyObject *) op;
1071 #undef b
1072 }
1073 
1074 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1075 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1076 {
1077     register Py_ssize_t i;
1078     register Py_ssize_t j;
1079     register Py_ssize_t size;
1080     register PyStringObject *op;
1081     size_t nbytes;
1082     if (n < 0)
1083         n = 0;
1084     /* watch out for overflows:  the size can overflow Py_ssize_t,
1085      * and the # of bytes needed can overflow size_t
1086      */
1087     if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1088         PyErr_SetString(PyExc_OverflowError,
1089             "repeated string is too long");
1090         return NULL;
1091     }
1092     size = Py_SIZE(a) * n;
1093     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1094         Py_INCREF(a);
1095         return (PyObject *)a;
1096     }
1097     nbytes = (size_t)size;
1098     if (nbytes + PyStringObject_SIZE <= nbytes) {
1099         PyErr_SetString(PyExc_OverflowError,
1100             "repeated string is too long");
1101         return NULL;
1102     }
1103     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1104     if (op == NULL)
1105         return PyErr_NoMemory();
1106     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1107     op->ob_shash = -1;
1108     op->ob_sstate = SSTATE_NOT_INTERNED;
1109     op->ob_sval[size] = '\0';
1110     if (Py_SIZE(a) == 1 && n > 0) {
1111         memset(op->ob_sval, a->ob_sval[0] , n);
1112         return (PyObject *) op;
1113     }
1114     i = 0;
1115     if (i < size) {
1116         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1117         i = Py_SIZE(a);
1118     }
1119     while (i < size) {
1120         j = (i <= size-i)  ?  i  :  size-i;
1121         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1122         i += j;
1123     }
1124     return (PyObject *) op;
1125 }
1126 
1127 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128 
1129 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1130 string_slice(register PyStringObject *a, register Py_ssize_t i,
1131              register Py_ssize_t j)
1132      /* j -- may be negative! */
1133 {
1134     if (i < 0)
1135         i = 0;
1136     if (j < 0)
1137         j = 0; /* Avoid signed/unsigned bug in next line */
1138     if (j > Py_SIZE(a))
1139         j = Py_SIZE(a);
1140     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1141         /* It's the same as a */
1142         Py_INCREF(a);
1143         return (PyObject *)a;
1144     }
1145     if (j < i)
1146         j = i;
1147     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1148 }
1149 
1150 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1151 string_contains(PyObject *str_obj, PyObject *sub_obj)
1152 {
1153     if (!PyString_CheckExact(sub_obj)) {
1154 #ifdef Py_USING_UNICODE
1155         if (PyUnicode_Check(sub_obj))
1156             return PyUnicode_Contains(str_obj, sub_obj);
1157 #endif
1158         if (!PyString_Check(sub_obj)) {
1159             PyErr_Format(PyExc_TypeError,
1160                 "'in <string>' requires string as left operand, "
1161                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1162             return -1;
1163         }
1164     }
1165 
1166     return stringlib_contains_obj(str_obj, sub_obj);
1167 }
1168 
1169 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1170 string_item(PyStringObject *a, register Py_ssize_t i)
1171 {
1172     char pchar;
1173     PyObject *v;
1174     if (i < 0 || i >= Py_SIZE(a)) {
1175         PyErr_SetString(PyExc_IndexError, "string index out of range");
1176         return NULL;
1177     }
1178     pchar = a->ob_sval[i];
1179     v = (PyObject *)characters[pchar & UCHAR_MAX];
1180     if (v == NULL)
1181         v = PyString_FromStringAndSize(&pchar, 1);
1182     else {
1183 #ifdef COUNT_ALLOCS
1184         one_strings++;
1185 #endif
1186         Py_INCREF(v);
1187     }
1188     return v;
1189 }
1190 
1191 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1192 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1193 {
1194     int c;
1195     Py_ssize_t len_a, len_b;
1196     Py_ssize_t min_len;
1197     PyObject *result;
1198 
1199     /* Make sure both arguments are strings. */
1200     if (!(PyString_Check(a) && PyString_Check(b))) {
1201         result = Py_NotImplemented;
1202         goto out;
1203     }
1204     if (a == b) {
1205         switch (op) {
1206         case Py_EQ:case Py_LE:case Py_GE:
1207             result = Py_True;
1208             goto out;
1209         case Py_NE:case Py_LT:case Py_GT:
1210             result = Py_False;
1211             goto out;
1212         }
1213     }
1214     if (op == Py_EQ) {
1215         /* Supporting Py_NE here as well does not save
1216            much time, since Py_NE is rarely used.  */
1217         if (Py_SIZE(a) == Py_SIZE(b)
1218             && (a->ob_sval[0] == b->ob_sval[0]
1219             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1220             result = Py_True;
1221         } else {
1222             result = Py_False;
1223         }
1224         goto out;
1225     }
1226     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1227     min_len = (len_a < len_b) ? len_a : len_b;
1228     if (min_len > 0) {
1229         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1230         if (c==0)
1231             c = memcmp(a->ob_sval, b->ob_sval, min_len);
1232     } else
1233         c = 0;
1234     if (c == 0)
1235         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1236     switch (op) {
1237     case Py_LT: c = c <  0; break;
1238     case Py_LE: c = c <= 0; break;
1239     case Py_EQ: assert(0);  break; /* unreachable */
1240     case Py_NE: c = c != 0; break;
1241     case Py_GT: c = c >  0; break;
1242     case Py_GE: c = c >= 0; break;
1243     default:
1244         result = Py_NotImplemented;
1245         goto out;
1246     }
1247     result = c ? Py_True : Py_False;
1248   out:
1249     Py_INCREF(result);
1250     return result;
1251 }
1252 
1253 int
_PyString_Eq(PyObject * o1,PyObject * o2)1254 _PyString_Eq(PyObject *o1, PyObject *o2)
1255 {
1256     PyStringObject *a = (PyStringObject*) o1;
1257     PyStringObject *b = (PyStringObject*) o2;
1258     return Py_SIZE(a) == Py_SIZE(b)
1259       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260 }
1261 
1262 static long
string_hash(PyStringObject * a)1263 string_hash(PyStringObject *a)
1264 {
1265     register Py_ssize_t len;
1266     register unsigned char *p;
1267     register long x;
1268 
1269 #ifdef Py_DEBUG
1270     assert(_Py_HashSecret_Initialized);
1271 #endif
1272     if (a->ob_shash != -1)
1273         return a->ob_shash;
1274     len = Py_SIZE(a);
1275     /*
1276       We make the hash of the empty string be 0, rather than using
1277       (prefix ^ suffix), since this slightly obfuscates the hash secret
1278     */
1279     if (len == 0) {
1280         a->ob_shash = 0;
1281         return 0;
1282     }
1283     p = (unsigned char *) a->ob_sval;
1284     x = _Py_HashSecret.prefix;
1285     x ^= *p << 7;
1286     while (--len >= 0)
1287         x = (1000003*x) ^ *p++;
1288     x ^= Py_SIZE(a);
1289     x ^= _Py_HashSecret.suffix;
1290     if (x == -1)
1291         x = -2;
1292     a->ob_shash = x;
1293     return x;
1294 }
1295 
1296 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1297 string_subscript(PyStringObject* self, PyObject* item)
1298 {
1299     if (PyIndex_Check(item)) {
1300         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301         if (i == -1 && PyErr_Occurred())
1302             return NULL;
1303         if (i < 0)
1304             i += PyString_GET_SIZE(self);
1305         return string_item(self, i);
1306     }
1307     else if (PySlice_Check(item)) {
1308         Py_ssize_t start, stop, step, slicelength, cur, i;
1309         char* source_buf;
1310         char* result_buf;
1311         PyObject* result;
1312 
1313         if (PySlice_GetIndicesEx((PySliceObject*)item,
1314                          PyString_GET_SIZE(self),
1315                          &start, &stop, &step, &slicelength) < 0) {
1316             return NULL;
1317         }
1318 
1319         if (slicelength <= 0) {
1320             return PyString_FromStringAndSize("", 0);
1321         }
1322         else if (start == 0 && step == 1 &&
1323                  slicelength == PyString_GET_SIZE(self) &&
1324                  PyString_CheckExact(self)) {
1325             Py_INCREF(self);
1326             return (PyObject *)self;
1327         }
1328         else if (step == 1) {
1329             return PyString_FromStringAndSize(
1330                 PyString_AS_STRING(self) + start,
1331                 slicelength);
1332         }
1333         else {
1334             source_buf = PyString_AsString((PyObject*)self);
1335             result_buf = (char *)PyMem_Malloc(slicelength);
1336             if (result_buf == NULL)
1337                 return PyErr_NoMemory();
1338 
1339             for (cur = start, i = 0; i < slicelength;
1340                  cur += step, i++) {
1341                 result_buf[i] = source_buf[cur];
1342             }
1343 
1344             result = PyString_FromStringAndSize(result_buf,
1345                                                 slicelength);
1346             PyMem_Free(result_buf);
1347             return result;
1348         }
1349     }
1350     else {
1351         PyErr_Format(PyExc_TypeError,
1352                      "string indices must be integers, not %.200s",
1353                      Py_TYPE(item)->tp_name);
1354         return NULL;
1355     }
1356 }
1357 
1358 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1359 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1360 {
1361     if ( index != 0 ) {
1362         PyErr_SetString(PyExc_SystemError,
1363                         "accessing non-existent string segment");
1364         return -1;
1365     }
1366     *ptr = (void *)self->ob_sval;
1367     return Py_SIZE(self);
1368 }
1369 
1370 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1371 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372 {
1373     PyErr_SetString(PyExc_TypeError,
1374                     "Cannot use string as modifiable buffer");
1375     return -1;
1376 }
1377 
1378 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1379 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1380 {
1381     if ( lenp )
1382         *lenp = Py_SIZE(self);
1383     return 1;
1384 }
1385 
1386 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1387 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1388 {
1389     if ( index != 0 ) {
1390         PyErr_SetString(PyExc_SystemError,
1391                         "accessing non-existent string segment");
1392         return -1;
1393     }
1394     *ptr = self->ob_sval;
1395     return Py_SIZE(self);
1396 }
1397 
1398 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1399 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1400 {
1401     return PyBuffer_FillInfo(view, (PyObject*)self,
1402                              (void *)self->ob_sval, Py_SIZE(self),
1403                              1, flags);
1404 }
1405 
1406 static PySequenceMethods string_as_sequence = {
1407     (lenfunc)string_length, /*sq_length*/
1408     (binaryfunc)string_concat, /*sq_concat*/
1409     (ssizeargfunc)string_repeat, /*sq_repeat*/
1410     (ssizeargfunc)string_item, /*sq_item*/
1411     (ssizessizeargfunc)string_slice, /*sq_slice*/
1412     0,                  /*sq_ass_item*/
1413     0,                  /*sq_ass_slice*/
1414     (objobjproc)string_contains /*sq_contains*/
1415 };
1416 
1417 static PyMappingMethods string_as_mapping = {
1418     (lenfunc)string_length,
1419     (binaryfunc)string_subscript,
1420     0,
1421 };
1422 
1423 static PyBufferProcs string_as_buffer = {
1424     (readbufferproc)string_buffer_getreadbuf,
1425     (writebufferproc)string_buffer_getwritebuf,
1426     (segcountproc)string_buffer_getsegcount,
1427     (charbufferproc)string_buffer_getcharbuf,
1428     (getbufferproc)string_buffer_getbuffer,
1429     0, /* XXX */
1430 };
1431 
1432 
1433 
1434 #define LEFTSTRIP 0
1435 #define RIGHTSTRIP 1
1436 #define BOTHSTRIP 2
1437 
1438 /* Arrays indexed by above */
1439 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440 
1441 #define STRIPNAME(i) (stripformat[i]+3)
1442 
1443 PyDoc_STRVAR(split__doc__,
1444 "S.split([sep [,maxsplit]]) -> list of strings\n\
1445 \n\
1446 Return a list of the words in the string S, using sep as the\n\
1447 delimiter string.  If maxsplit is given, at most maxsplit\n\
1448 splits are done. If sep is not specified or is None, any\n\
1449 whitespace string is a separator and empty strings are removed\n\
1450 from the result.");
1451 
1452 static PyObject *
string_split(PyStringObject * self,PyObject * args)1453 string_split(PyStringObject *self, PyObject *args)
1454 {
1455     Py_ssize_t len = PyString_GET_SIZE(self), n;
1456     Py_ssize_t maxsplit = -1;
1457     const char *s = PyString_AS_STRING(self), *sub;
1458     PyObject *subobj = Py_None;
1459 
1460     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461         return NULL;
1462     if (maxsplit < 0)
1463         maxsplit = PY_SSIZE_T_MAX;
1464     if (subobj == Py_None)
1465         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466     if (PyString_Check(subobj)) {
1467         sub = PyString_AS_STRING(subobj);
1468         n = PyString_GET_SIZE(subobj);
1469     }
1470 #ifdef Py_USING_UNICODE
1471     else if (PyUnicode_Check(subobj))
1472         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1473 #endif
1474     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475         return NULL;
1476 
1477     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1478 }
1479 
1480 PyDoc_STRVAR(partition__doc__,
1481 "S.partition(sep) -> (head, sep, tail)\n\
1482 \n\
1483 Search for the separator sep in S, and return the part before it,\n\
1484 the separator itself, and the part after it.  If the separator is not\n\
1485 found, return S and two empty strings.");
1486 
1487 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1488 string_partition(PyStringObject *self, PyObject *sep_obj)
1489 {
1490     const char *sep;
1491     Py_ssize_t sep_len;
1492 
1493     if (PyString_Check(sep_obj)) {
1494         sep = PyString_AS_STRING(sep_obj);
1495         sep_len = PyString_GET_SIZE(sep_obj);
1496     }
1497 #ifdef Py_USING_UNICODE
1498     else if (PyUnicode_Check(sep_obj))
1499         return PyUnicode_Partition((PyObject *) self, sep_obj);
1500 #endif
1501     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502         return NULL;
1503 
1504     return stringlib_partition(
1505         (PyObject*) self,
1506         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507         sep_obj, sep, sep_len
1508         );
1509 }
1510 
1511 PyDoc_STRVAR(rpartition__doc__,
1512 "S.rpartition(sep) -> (head, sep, tail)\n\
1513 \n\
1514 Search for the separator sep in S, starting at the end of S, and return\n\
1515 the part before it, the separator itself, and the part after it.  If the\n\
1516 separator is not found, return two empty strings and S.");
1517 
1518 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1519 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1520 {
1521     const char *sep;
1522     Py_ssize_t sep_len;
1523 
1524     if (PyString_Check(sep_obj)) {
1525         sep = PyString_AS_STRING(sep_obj);
1526         sep_len = PyString_GET_SIZE(sep_obj);
1527     }
1528 #ifdef Py_USING_UNICODE
1529     else if (PyUnicode_Check(sep_obj))
1530         return PyUnicode_RPartition((PyObject *) self, sep_obj);
1531 #endif
1532     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533         return NULL;
1534 
1535     return stringlib_rpartition(
1536         (PyObject*) self,
1537         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538         sep_obj, sep, sep_len
1539         );
1540 }
1541 
1542 PyDoc_STRVAR(rsplit__doc__,
1543 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1544 \n\
1545 Return a list of the words in the string S, using sep as the\n\
1546 delimiter string, starting at the end of the string and working\n\
1547 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1548 done. If sep is not specified or is None, any whitespace string\n\
1549 is a separator.");
1550 
1551 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1552 string_rsplit(PyStringObject *self, PyObject *args)
1553 {
1554     Py_ssize_t len = PyString_GET_SIZE(self), n;
1555     Py_ssize_t maxsplit = -1;
1556     const char *s = PyString_AS_STRING(self), *sub;
1557     PyObject *subobj = Py_None;
1558 
1559     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560         return NULL;
1561     if (maxsplit < 0)
1562         maxsplit = PY_SSIZE_T_MAX;
1563     if (subobj == Py_None)
1564         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565     if (PyString_Check(subobj)) {
1566         sub = PyString_AS_STRING(subobj);
1567         n = PyString_GET_SIZE(subobj);
1568     }
1569 #ifdef Py_USING_UNICODE
1570     else if (PyUnicode_Check(subobj))
1571         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1572 #endif
1573     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574         return NULL;
1575 
1576     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1577 }
1578 
1579 
1580 PyDoc_STRVAR(join__doc__,
1581 "S.join(iterable) -> string\n\
1582 \n\
1583 Return a string which is the concatenation of the strings in the\n\
1584 iterable.  The separator between elements is S.");
1585 
1586 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1587 string_join(PyStringObject *self, PyObject *orig)
1588 {
1589     char *sep = PyString_AS_STRING(self);
1590     const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591     PyObject *res = NULL;
1592     char *p;
1593     Py_ssize_t seqlen = 0;
1594     size_t sz = 0;
1595     Py_ssize_t i;
1596     PyObject *seq, *item;
1597 
1598     seq = PySequence_Fast(orig, "can only join an iterable");
1599     if (seq == NULL) {
1600         return NULL;
1601     }
1602 
1603     seqlen = PySequence_Size(seq);
1604     if (seqlen == 0) {
1605         Py_DECREF(seq);
1606         return PyString_FromString("");
1607     }
1608     if (seqlen == 1) {
1609         item = PySequence_Fast_GET_ITEM(seq, 0);
1610         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611             Py_INCREF(item);
1612             Py_DECREF(seq);
1613             return item;
1614         }
1615     }
1616 
1617     /* There are at least two things to join, or else we have a subclass
1618      * of the builtin types in the sequence.
1619      * Do a pre-pass to figure out the total amount of space we'll
1620      * need (sz), see whether any argument is absurd, and defer to
1621      * the Unicode join if appropriate.
1622      */
1623     for (i = 0; i < seqlen; i++) {
1624         const size_t old_sz = sz;
1625         item = PySequence_Fast_GET_ITEM(seq, i);
1626         if (!PyString_Check(item)){
1627 #ifdef Py_USING_UNICODE
1628             if (PyUnicode_Check(item)) {
1629                 /* Defer to Unicode join.
1630                  * CAUTION:  There's no guarantee that the
1631                  * original sequence can be iterated over
1632                  * again, so we must pass seq here.
1633                  */
1634                 PyObject *result;
1635                 result = PyUnicode_Join((PyObject *)self, seq);
1636                 Py_DECREF(seq);
1637                 return result;
1638             }
1639 #endif
1640             PyErr_Format(PyExc_TypeError,
1641                          "sequence item %zd: expected string,"
1642                          " %.80s found",
1643                          i, Py_TYPE(item)->tp_name);
1644             Py_DECREF(seq);
1645             return NULL;
1646         }
1647         sz += PyString_GET_SIZE(item);
1648         if (i != 0)
1649             sz += seplen;
1650         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651             PyErr_SetString(PyExc_OverflowError,
1652                 "join() result is too long for a Python string");
1653             Py_DECREF(seq);
1654             return NULL;
1655         }
1656     }
1657 
1658     /* Allocate result space. */
1659     res = PyString_FromStringAndSize((char*)NULL, sz);
1660     if (res == NULL) {
1661         Py_DECREF(seq);
1662         return NULL;
1663     }
1664 
1665     /* Catenate everything. */
1666     p = PyString_AS_STRING(res);
1667     for (i = 0; i < seqlen; ++i) {
1668         size_t n;
1669         item = PySequence_Fast_GET_ITEM(seq, i);
1670         n = PyString_GET_SIZE(item);
1671         Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672         p += n;
1673         if (i < seqlen - 1) {
1674             Py_MEMCPY(p, sep, seplen);
1675             p += seplen;
1676         }
1677     }
1678 
1679     Py_DECREF(seq);
1680     return res;
1681 }
1682 
1683 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1684 _PyString_Join(PyObject *sep, PyObject *x)
1685 {
1686     assert(sep != NULL && PyString_Check(sep));
1687     assert(x != NULL);
1688     return string_join((PyStringObject *)sep, x);
1689 }
1690 
1691 /* helper macro to fixup start/end slice values */
1692 #define ADJUST_INDICES(start, end, len)         \
1693     if (end > len)                          \
1694         end = len;                          \
1695     else if (end < 0) {                     \
1696         end += len;                         \
1697         if (end < 0)                        \
1698         end = 0;                        \
1699     }                                       \
1700     if (start < 0) {                        \
1701         start += len;                       \
1702         if (start < 0)                      \
1703         start = 0;                      \
1704     }
1705 
1706 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1707 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1708 {
1709     PyObject *subobj;
1710     const char *sub;
1711     Py_ssize_t sub_len;
1712     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1713 
1714     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715                                     args, &subobj, &start, &end))
1716         return -2;
1717 
1718     if (PyString_Check(subobj)) {
1719         sub = PyString_AS_STRING(subobj);
1720         sub_len = PyString_GET_SIZE(subobj);
1721     }
1722 #ifdef Py_USING_UNICODE
1723     else if (PyUnicode_Check(subobj))
1724         return PyUnicode_Find(
1725             (PyObject *)self, subobj, start, end, dir);
1726 #endif
1727     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728         /* XXX - the "expected a character buffer object" is pretty
1729            confusing for a non-expert.  remap to something else ? */
1730         return -2;
1731 
1732     if (dir > 0)
1733         return stringlib_find_slice(
1734             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735             sub, sub_len, start, end);
1736     else
1737         return stringlib_rfind_slice(
1738             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739             sub, sub_len, start, end);
1740 }
1741 
1742 
1743 PyDoc_STRVAR(find__doc__,
1744 "S.find(sub [,start [,end]]) -> int\n\
1745 \n\
1746 Return the lowest index in S where substring sub is found,\n\
1747 such that sub is contained within S[start:end].  Optional\n\
1748 arguments start and end are interpreted as in slice notation.\n\
1749 \n\
1750 Return -1 on failure.");
1751 
1752 static PyObject *
string_find(PyStringObject * self,PyObject * args)1753 string_find(PyStringObject *self, PyObject *args)
1754 {
1755     Py_ssize_t result = string_find_internal(self, args, +1);
1756     if (result == -2)
1757         return NULL;
1758     return PyInt_FromSsize_t(result);
1759 }
1760 
1761 
1762 PyDoc_STRVAR(index__doc__,
1763 "S.index(sub [,start [,end]]) -> int\n\
1764 \n\
1765 Like S.find() but raise ValueError when the substring is not found.");
1766 
1767 static PyObject *
string_index(PyStringObject * self,PyObject * args)1768 string_index(PyStringObject *self, PyObject *args)
1769 {
1770     Py_ssize_t result = string_find_internal(self, args, +1);
1771     if (result == -2)
1772         return NULL;
1773     if (result == -1) {
1774         PyErr_SetString(PyExc_ValueError,
1775                         "substring not found");
1776         return NULL;
1777     }
1778     return PyInt_FromSsize_t(result);
1779 }
1780 
1781 
1782 PyDoc_STRVAR(rfind__doc__,
1783 "S.rfind(sub [,start [,end]]) -> int\n\
1784 \n\
1785 Return the highest index in S where substring sub is found,\n\
1786 such that sub is contained within S[start:end].  Optional\n\
1787 arguments start and end are interpreted as in slice notation.\n\
1788 \n\
1789 Return -1 on failure.");
1790 
1791 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1792 string_rfind(PyStringObject *self, PyObject *args)
1793 {
1794     Py_ssize_t result = string_find_internal(self, args, -1);
1795     if (result == -2)
1796         return NULL;
1797     return PyInt_FromSsize_t(result);
1798 }
1799 
1800 
1801 PyDoc_STRVAR(rindex__doc__,
1802 "S.rindex(sub [,start [,end]]) -> int\n\
1803 \n\
1804 Like S.rfind() but raise ValueError when the substring is not found.");
1805 
1806 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1807 string_rindex(PyStringObject *self, PyObject *args)
1808 {
1809     Py_ssize_t result = string_find_internal(self, args, -1);
1810     if (result == -2)
1811         return NULL;
1812     if (result == -1) {
1813         PyErr_SetString(PyExc_ValueError,
1814                         "substring not found");
1815         return NULL;
1816     }
1817     return PyInt_FromSsize_t(result);
1818 }
1819 
1820 
1821 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1822 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1823 {
1824     char *s = PyString_AS_STRING(self);
1825     Py_ssize_t len = PyString_GET_SIZE(self);
1826     char *sep = PyString_AS_STRING(sepobj);
1827     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828     Py_ssize_t i, j;
1829 
1830     i = 0;
1831     if (striptype != RIGHTSTRIP) {
1832         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833             i++;
1834         }
1835     }
1836 
1837     j = len;
1838     if (striptype != LEFTSTRIP) {
1839         do {
1840             j--;
1841         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842         j++;
1843     }
1844 
1845     if (i == 0 && j == len && PyString_CheckExact(self)) {
1846         Py_INCREF(self);
1847         return (PyObject*)self;
1848     }
1849     else
1850         return PyString_FromStringAndSize(s+i, j-i);
1851 }
1852 
1853 
1854 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1855 do_strip(PyStringObject *self, int striptype)
1856 {
1857     char *s = PyString_AS_STRING(self);
1858     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1859 
1860     i = 0;
1861     if (striptype != RIGHTSTRIP) {
1862         while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863             i++;
1864         }
1865     }
1866 
1867     j = len;
1868     if (striptype != LEFTSTRIP) {
1869         do {
1870             j--;
1871         } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872         j++;
1873     }
1874 
1875     if (i == 0 && j == len && PyString_CheckExact(self)) {
1876         Py_INCREF(self);
1877         return (PyObject*)self;
1878     }
1879     else
1880         return PyString_FromStringAndSize(s+i, j-i);
1881 }
1882 
1883 
1884 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1885 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1886 {
1887     PyObject *sep = NULL;
1888 
1889     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890         return NULL;
1891 
1892     if (sep != NULL && sep != Py_None) {
1893         if (PyString_Check(sep))
1894             return do_xstrip(self, striptype, sep);
1895 #ifdef Py_USING_UNICODE
1896         else if (PyUnicode_Check(sep)) {
1897             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898             PyObject *res;
1899             if (uniself==NULL)
1900                 return NULL;
1901             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902                 striptype, sep);
1903             Py_DECREF(uniself);
1904             return res;
1905         }
1906 #endif
1907         PyErr_Format(PyExc_TypeError,
1908 #ifdef Py_USING_UNICODE
1909                      "%s arg must be None, str or unicode",
1910 #else
1911                      "%s arg must be None or str",
1912 #endif
1913                      STRIPNAME(striptype));
1914         return NULL;
1915     }
1916 
1917     return do_strip(self, striptype);
1918 }
1919 
1920 
1921 PyDoc_STRVAR(strip__doc__,
1922 "S.strip([chars]) -> string or unicode\n\
1923 \n\
1924 Return a copy of the string S with leading and trailing\n\
1925 whitespace removed.\n\
1926 If chars is given and not None, remove characters in chars instead.\n\
1927 If chars is unicode, S will be converted to unicode before stripping");
1928 
1929 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1930 string_strip(PyStringObject *self, PyObject *args)
1931 {
1932     if (PyTuple_GET_SIZE(args) == 0)
1933         return do_strip(self, BOTHSTRIP); /* Common case */
1934     else
1935         return do_argstrip(self, BOTHSTRIP, args);
1936 }
1937 
1938 
1939 PyDoc_STRVAR(lstrip__doc__,
1940 "S.lstrip([chars]) -> string or unicode\n\
1941 \n\
1942 Return a copy of the string S with leading whitespace removed.\n\
1943 If chars is given and not None, remove characters in chars instead.\n\
1944 If chars is unicode, S will be converted to unicode before stripping");
1945 
1946 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1947 string_lstrip(PyStringObject *self, PyObject *args)
1948 {
1949     if (PyTuple_GET_SIZE(args) == 0)
1950         return do_strip(self, LEFTSTRIP); /* Common case */
1951     else
1952         return do_argstrip(self, LEFTSTRIP, args);
1953 }
1954 
1955 
1956 PyDoc_STRVAR(rstrip__doc__,
1957 "S.rstrip([chars]) -> string or unicode\n\
1958 \n\
1959 Return a copy of the string S with trailing whitespace removed.\n\
1960 If chars is given and not None, remove characters in chars instead.\n\
1961 If chars is unicode, S will be converted to unicode before stripping");
1962 
1963 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1964 string_rstrip(PyStringObject *self, PyObject *args)
1965 {
1966     if (PyTuple_GET_SIZE(args) == 0)
1967         return do_strip(self, RIGHTSTRIP); /* Common case */
1968     else
1969         return do_argstrip(self, RIGHTSTRIP, args);
1970 }
1971 
1972 
1973 PyDoc_STRVAR(lower__doc__,
1974 "S.lower() -> string\n\
1975 \n\
1976 Return a copy of the string S converted to lowercase.");
1977 
1978 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979 #ifndef _tolower
1980 #define _tolower tolower
1981 #endif
1982 
1983 static PyObject *
string_lower(PyStringObject * self)1984 string_lower(PyStringObject *self)
1985 {
1986     char *s;
1987     Py_ssize_t i, n = PyString_GET_SIZE(self);
1988     PyObject *newobj;
1989 
1990     newobj = PyString_FromStringAndSize(NULL, n);
1991     if (!newobj)
1992         return NULL;
1993 
1994     s = PyString_AS_STRING(newobj);
1995 
1996     Py_MEMCPY(s, PyString_AS_STRING(self), n);
1997 
1998     for (i = 0; i < n; i++) {
1999         int c = Py_CHARMASK(s[i]);
2000         if (isupper(c))
2001             s[i] = _tolower(c);
2002     }
2003 
2004     return newobj;
2005 }
2006 
2007 PyDoc_STRVAR(upper__doc__,
2008 "S.upper() -> string\n\
2009 \n\
2010 Return a copy of the string S converted to uppercase.");
2011 
2012 #ifndef _toupper
2013 #define _toupper toupper
2014 #endif
2015 
2016 static PyObject *
string_upper(PyStringObject * self)2017 string_upper(PyStringObject *self)
2018 {
2019     char *s;
2020     Py_ssize_t i, n = PyString_GET_SIZE(self);
2021     PyObject *newobj;
2022 
2023     newobj = PyString_FromStringAndSize(NULL, n);
2024     if (!newobj)
2025         return NULL;
2026 
2027     s = PyString_AS_STRING(newobj);
2028 
2029     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2030 
2031     for (i = 0; i < n; i++) {
2032         int c = Py_CHARMASK(s[i]);
2033         if (islower(c))
2034             s[i] = _toupper(c);
2035     }
2036 
2037     return newobj;
2038 }
2039 
2040 PyDoc_STRVAR(title__doc__,
2041 "S.title() -> string\n\
2042 \n\
2043 Return a titlecased version of S, i.e. words start with uppercase\n\
2044 characters, all remaining cased characters have lowercase.");
2045 
2046 static PyObject*
string_title(PyStringObject * self)2047 string_title(PyStringObject *self)
2048 {
2049     char *s = PyString_AS_STRING(self), *s_new;
2050     Py_ssize_t i, n = PyString_GET_SIZE(self);
2051     int previous_is_cased = 0;
2052     PyObject *newobj;
2053 
2054     newobj = PyString_FromStringAndSize(NULL, n);
2055     if (newobj == NULL)
2056         return NULL;
2057     s_new = PyString_AsString(newobj);
2058     for (i = 0; i < n; i++) {
2059         int c = Py_CHARMASK(*s++);
2060         if (islower(c)) {
2061             if (!previous_is_cased)
2062                 c = toupper(c);
2063             previous_is_cased = 1;
2064         } else if (isupper(c)) {
2065             if (previous_is_cased)
2066                 c = tolower(c);
2067             previous_is_cased = 1;
2068         } else
2069             previous_is_cased = 0;
2070         *s_new++ = c;
2071     }
2072     return newobj;
2073 }
2074 
2075 PyDoc_STRVAR(capitalize__doc__,
2076 "S.capitalize() -> string\n\
2077 \n\
2078 Return a copy of the string S with only its first character\n\
2079 capitalized.");
2080 
2081 static PyObject *
string_capitalize(PyStringObject * self)2082 string_capitalize(PyStringObject *self)
2083 {
2084     char *s = PyString_AS_STRING(self), *s_new;
2085     Py_ssize_t i, n = PyString_GET_SIZE(self);
2086     PyObject *newobj;
2087 
2088     newobj = PyString_FromStringAndSize(NULL, n);
2089     if (newobj == NULL)
2090         return NULL;
2091     s_new = PyString_AsString(newobj);
2092     if (0 < n) {
2093         int c = Py_CHARMASK(*s++);
2094         if (islower(c))
2095             *s_new = toupper(c);
2096         else
2097             *s_new = c;
2098         s_new++;
2099     }
2100     for (i = 1; i < n; i++) {
2101         int c = Py_CHARMASK(*s++);
2102         if (isupper(c))
2103             *s_new = tolower(c);
2104         else
2105             *s_new = c;
2106         s_new++;
2107     }
2108     return newobj;
2109 }
2110 
2111 
2112 PyDoc_STRVAR(count__doc__,
2113 "S.count(sub[, start[, end]]) -> int\n\
2114 \n\
2115 Return the number of non-overlapping occurrences of substring sub in\n\
2116 string S[start:end].  Optional arguments start and end are interpreted\n\
2117 as in slice notation.");
2118 
2119 static PyObject *
string_count(PyStringObject * self,PyObject * args)2120 string_count(PyStringObject *self, PyObject *args)
2121 {
2122     PyObject *sub_obj;
2123     const char *str = PyString_AS_STRING(self), *sub;
2124     Py_ssize_t sub_len;
2125     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2126 
2127     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2128         return NULL;
2129 
2130     if (PyString_Check(sub_obj)) {
2131         sub = PyString_AS_STRING(sub_obj);
2132         sub_len = PyString_GET_SIZE(sub_obj);
2133     }
2134 #ifdef Py_USING_UNICODE
2135     else if (PyUnicode_Check(sub_obj)) {
2136         Py_ssize_t count;
2137         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138         if (count == -1)
2139             return NULL;
2140         else
2141             return PyInt_FromSsize_t(count);
2142     }
2143 #endif
2144     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145         return NULL;
2146 
2147     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2148 
2149     return PyInt_FromSsize_t(
2150         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151         );
2152 }
2153 
2154 PyDoc_STRVAR(swapcase__doc__,
2155 "S.swapcase() -> string\n\
2156 \n\
2157 Return a copy of the string S with uppercase characters\n\
2158 converted to lowercase and vice versa.");
2159 
2160 static PyObject *
string_swapcase(PyStringObject * self)2161 string_swapcase(PyStringObject *self)
2162 {
2163     char *s = PyString_AS_STRING(self), *s_new;
2164     Py_ssize_t i, n = PyString_GET_SIZE(self);
2165     PyObject *newobj;
2166 
2167     newobj = PyString_FromStringAndSize(NULL, n);
2168     if (newobj == NULL)
2169         return NULL;
2170     s_new = PyString_AsString(newobj);
2171     for (i = 0; i < n; i++) {
2172         int c = Py_CHARMASK(*s++);
2173         if (islower(c)) {
2174             *s_new = toupper(c);
2175         }
2176         else if (isupper(c)) {
2177             *s_new = tolower(c);
2178         }
2179         else
2180             *s_new = c;
2181         s_new++;
2182     }
2183     return newobj;
2184 }
2185 
2186 
2187 PyDoc_STRVAR(translate__doc__,
2188 "S.translate(table [,deletechars]) -> string\n\
2189 \n\
2190 Return a copy of the string S, where all characters occurring\n\
2191 in the optional argument deletechars are removed, and the\n\
2192 remaining characters have been mapped through the given\n\
2193 translation table, which must be a string of length 256 or None.\n\
2194 If the table argument is None, no translation is applied and\n\
2195 the operation simply removes the characters in deletechars.");
2196 
2197 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2198 string_translate(PyStringObject *self, PyObject *args)
2199 {
2200     register char *input, *output;
2201     const char *table;
2202     register Py_ssize_t i, c, changed = 0;
2203     PyObject *input_obj = (PyObject*)self;
2204     const char *output_start, *del_table=NULL;
2205     Py_ssize_t inlen, tablen, dellen = 0;
2206     PyObject *result;
2207     int trans_table[256];
2208     PyObject *tableobj, *delobj = NULL;
2209 
2210     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211                           &tableobj, &delobj))
2212         return NULL;
2213 
2214     if (PyString_Check(tableobj)) {
2215         table = PyString_AS_STRING(tableobj);
2216         tablen = PyString_GET_SIZE(tableobj);
2217     }
2218     else if (tableobj == Py_None) {
2219         table = NULL;
2220         tablen = 256;
2221     }
2222 #ifdef Py_USING_UNICODE
2223     else if (PyUnicode_Check(tableobj)) {
2224         /* Unicode .translate() does not support the deletechars
2225            parameter; instead a mapping to None will cause characters
2226            to be deleted. */
2227         if (delobj != NULL) {
2228             PyErr_SetString(PyExc_TypeError,
2229             "deletions are implemented differently for unicode");
2230             return NULL;
2231         }
2232         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233     }
2234 #endif
2235     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236         return NULL;
2237 
2238     if (tablen != 256) {
2239         PyErr_SetString(PyExc_ValueError,
2240           "translation table must be 256 characters long");
2241         return NULL;
2242     }
2243 
2244     if (delobj != NULL) {
2245         if (PyString_Check(delobj)) {
2246             del_table = PyString_AS_STRING(delobj);
2247             dellen = PyString_GET_SIZE(delobj);
2248         }
2249 #ifdef Py_USING_UNICODE
2250         else if (PyUnicode_Check(delobj)) {
2251             PyErr_SetString(PyExc_TypeError,
2252             "deletions are implemented differently for unicode");
2253             return NULL;
2254         }
2255 #endif
2256         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257             return NULL;
2258     }
2259     else {
2260         del_table = NULL;
2261         dellen = 0;
2262     }
2263 
2264     inlen = PyString_GET_SIZE(input_obj);
2265     result = PyString_FromStringAndSize((char *)NULL, inlen);
2266     if (result == NULL)
2267         return NULL;
2268     output_start = output = PyString_AsString(result);
2269     input = PyString_AS_STRING(input_obj);
2270 
2271     if (dellen == 0 && table != NULL) {
2272         /* If no deletions are required, use faster code */
2273         for (i = inlen; --i >= 0; ) {
2274             c = Py_CHARMASK(*input++);
2275             if (Py_CHARMASK((*output++ = table[c])) != c)
2276                 changed = 1;
2277         }
2278         if (changed || !PyString_CheckExact(input_obj))
2279             return result;
2280         Py_DECREF(result);
2281         Py_INCREF(input_obj);
2282         return input_obj;
2283     }
2284 
2285     if (table == NULL) {
2286         for (i = 0; i < 256; i++)
2287             trans_table[i] = Py_CHARMASK(i);
2288     } else {
2289         for (i = 0; i < 256; i++)
2290             trans_table[i] = Py_CHARMASK(table[i]);
2291     }
2292 
2293     for (i = 0; i < dellen; i++)
2294         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2295 
2296     for (i = inlen; --i >= 0; ) {
2297         c = Py_CHARMASK(*input++);
2298         if (trans_table[c] != -1)
2299             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300                 continue;
2301         changed = 1;
2302     }
2303     if (!changed && PyString_CheckExact(input_obj)) {
2304         Py_DECREF(result);
2305         Py_INCREF(input_obj);
2306         return input_obj;
2307     }
2308     /* Fix the size of the resulting string */
2309     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310         return NULL;
2311     return result;
2312 }
2313 
2314 
2315 /* find and count characters and substrings */
2316 
2317 #define findchar(target, target_len, c)                         \
2318   ((char *)memchr((const void *)(target), c, target_len))
2319 
2320 /* String ops must return a string.  */
2321 /* If the object is subclass of string, create a copy */
2322 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2323 return_self(PyStringObject *self)
2324 {
2325     if (PyString_CheckExact(self)) {
2326         Py_INCREF(self);
2327         return self;
2328     }
2329     return (PyStringObject *)PyString_FromStringAndSize(
2330         PyString_AS_STRING(self),
2331         PyString_GET_SIZE(self));
2332 }
2333 
2334 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2335 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2336 {
2337     Py_ssize_t count=0;
2338     const char *start=target;
2339     const char *end=target+target_len;
2340 
2341     while ( (start=findchar(start, end-start, c)) != NULL ) {
2342         count++;
2343         if (count >= maxcount)
2344             break;
2345         start += 1;
2346     }
2347     return count;
2348 }
2349 
2350 
2351 /* Algorithms for different cases of string replacement */
2352 
2353 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2354 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2355 replace_interleave(PyStringObject *self,
2356                    const char *to_s, Py_ssize_t to_len,
2357                    Py_ssize_t maxcount)
2358 {
2359     char *self_s, *result_s;
2360     Py_ssize_t self_len, result_len;
2361     Py_ssize_t count, i, product;
2362     PyStringObject *result;
2363 
2364     self_len = PyString_GET_SIZE(self);
2365 
2366     /* 1 at the end plus 1 after every character */
2367     count = self_len+1;
2368     if (maxcount < count)
2369         count = maxcount;
2370 
2371     /* Check for overflow */
2372     /*   result_len = count * to_len + self_len; */
2373     product = count * to_len;
2374     if (product / to_len != count) {
2375         PyErr_SetString(PyExc_OverflowError,
2376                         "replace string is too long");
2377         return NULL;
2378     }
2379     result_len = product + self_len;
2380     if (result_len < 0) {
2381         PyErr_SetString(PyExc_OverflowError,
2382                         "replace string is too long");
2383         return NULL;
2384     }
2385 
2386     if (! (result = (PyStringObject *)
2387                      PyString_FromStringAndSize(NULL, result_len)) )
2388         return NULL;
2389 
2390     self_s = PyString_AS_STRING(self);
2391     result_s = PyString_AS_STRING(result);
2392 
2393     /* TODO: special case single character, which doesn't need memcpy */
2394 
2395     /* Lay the first one down (guaranteed this will occur) */
2396     Py_MEMCPY(result_s, to_s, to_len);
2397     result_s += to_len;
2398     count -= 1;
2399 
2400     for (i=0; i<count; i++) {
2401         *result_s++ = *self_s++;
2402         Py_MEMCPY(result_s, to_s, to_len);
2403         result_s += to_len;
2404     }
2405 
2406     /* Copy the rest of the original string */
2407     Py_MEMCPY(result_s, self_s, self_len-i);
2408 
2409     return result;
2410 }
2411 
2412 /* Special case for deleting a single character */
2413 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2414 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2415 replace_delete_single_character(PyStringObject *self,
2416                                 char from_c, Py_ssize_t maxcount)
2417 {
2418     char *self_s, *result_s;
2419     char *start, *next, *end;
2420     Py_ssize_t self_len, result_len;
2421     Py_ssize_t count;
2422     PyStringObject *result;
2423 
2424     self_len = PyString_GET_SIZE(self);
2425     self_s = PyString_AS_STRING(self);
2426 
2427     count = countchar(self_s, self_len, from_c, maxcount);
2428     if (count == 0) {
2429         return return_self(self);
2430     }
2431 
2432     result_len = self_len - count;  /* from_len == 1 */
2433     assert(result_len>=0);
2434 
2435     if ( (result = (PyStringObject *)
2436                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
2437         return NULL;
2438     result_s = PyString_AS_STRING(result);
2439 
2440     start = self_s;
2441     end = self_s + self_len;
2442     while (count-- > 0) {
2443         next = findchar(start, end-start, from_c);
2444         if (next == NULL)
2445             break;
2446         Py_MEMCPY(result_s, start, next-start);
2447         result_s += (next-start);
2448         start = next+1;
2449     }
2450     Py_MEMCPY(result_s, start, end-start);
2451 
2452     return result;
2453 }
2454 
2455 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2456 
2457 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2458 replace_delete_substring(PyStringObject *self,
2459                          const char *from_s, Py_ssize_t from_len,
2460                          Py_ssize_t maxcount) {
2461     char *self_s, *result_s;
2462     char *start, *next, *end;
2463     Py_ssize_t self_len, result_len;
2464     Py_ssize_t count, offset;
2465     PyStringObject *result;
2466 
2467     self_len = PyString_GET_SIZE(self);
2468     self_s = PyString_AS_STRING(self);
2469 
2470     count = stringlib_count(self_s, self_len,
2471                             from_s, from_len,
2472                             maxcount);
2473 
2474     if (count == 0) {
2475         /* no matches */
2476         return return_self(self);
2477     }
2478 
2479     result_len = self_len - (count * from_len);
2480     assert (result_len>=0);
2481 
2482     if ( (result = (PyStringObject *)
2483           PyString_FromStringAndSize(NULL, result_len)) == NULL )
2484         return NULL;
2485 
2486     result_s = PyString_AS_STRING(result);
2487 
2488     start = self_s;
2489     end = self_s + self_len;
2490     while (count-- > 0) {
2491         offset = stringlib_find(start, end-start,
2492                                 from_s, from_len,
2493                                 0);
2494         if (offset == -1)
2495             break;
2496         next = start + offset;
2497 
2498         Py_MEMCPY(result_s, start, next-start);
2499 
2500         result_s += (next-start);
2501         start = next+from_len;
2502     }
2503     Py_MEMCPY(result_s, start, end-start);
2504     return result;
2505 }
2506 
2507 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2508 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2509 replace_single_character_in_place(PyStringObject *self,
2510                                   char from_c, char to_c,
2511                                   Py_ssize_t maxcount)
2512 {
2513     char *self_s, *result_s, *start, *end, *next;
2514     Py_ssize_t self_len;
2515     PyStringObject *result;
2516 
2517     /* The result string will be the same size */
2518     self_s = PyString_AS_STRING(self);
2519     self_len = PyString_GET_SIZE(self);
2520 
2521     next = findchar(self_s, self_len, from_c);
2522 
2523     if (next == NULL) {
2524         /* No matches; return the original string */
2525         return return_self(self);
2526     }
2527 
2528     /* Need to make a new string */
2529     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2530     if (result == NULL)
2531         return NULL;
2532     result_s = PyString_AS_STRING(result);
2533     Py_MEMCPY(result_s, self_s, self_len);
2534 
2535     /* change everything in-place, starting with this one */
2536     start =  result_s + (next-self_s);
2537     *start = to_c;
2538     start++;
2539     end = result_s + self_len;
2540 
2541     while (--maxcount > 0) {
2542         next = findchar(start, end-start, from_c);
2543         if (next == NULL)
2544             break;
2545         *next = to_c;
2546         start = next+1;
2547     }
2548 
2549     return result;
2550 }
2551 
2552 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2553 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2554 replace_substring_in_place(PyStringObject *self,
2555                            const char *from_s, Py_ssize_t from_len,
2556                            const char *to_s, Py_ssize_t to_len,
2557                            Py_ssize_t maxcount)
2558 {
2559     char *result_s, *start, *end;
2560     char *self_s;
2561     Py_ssize_t self_len, offset;
2562     PyStringObject *result;
2563 
2564     /* The result string will be the same size */
2565 
2566     self_s = PyString_AS_STRING(self);
2567     self_len = PyString_GET_SIZE(self);
2568 
2569     offset = stringlib_find(self_s, self_len,
2570                             from_s, from_len,
2571                             0);
2572     if (offset == -1) {
2573         /* No matches; return the original string */
2574         return return_self(self);
2575     }
2576 
2577     /* Need to make a new string */
2578     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2579     if (result == NULL)
2580         return NULL;
2581     result_s = PyString_AS_STRING(result);
2582     Py_MEMCPY(result_s, self_s, self_len);
2583 
2584     /* change everything in-place, starting with this one */
2585     start =  result_s + offset;
2586     Py_MEMCPY(start, to_s, from_len);
2587     start += from_len;
2588     end = result_s + self_len;
2589 
2590     while ( --maxcount > 0) {
2591         offset = stringlib_find(start, end-start,
2592                                 from_s, from_len,
2593                                 0);
2594         if (offset==-1)
2595             break;
2596         Py_MEMCPY(start+offset, to_s, from_len);
2597         start += offset+from_len;
2598     }
2599 
2600     return result;
2601 }
2602 
2603 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2604 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2605 replace_single_character(PyStringObject *self,
2606                          char from_c,
2607                          const char *to_s, Py_ssize_t to_len,
2608                          Py_ssize_t maxcount)
2609 {
2610     char *self_s, *result_s;
2611     char *start, *next, *end;
2612     Py_ssize_t self_len, result_len;
2613     Py_ssize_t count, product;
2614     PyStringObject *result;
2615 
2616     self_s = PyString_AS_STRING(self);
2617     self_len = PyString_GET_SIZE(self);
2618 
2619     count = countchar(self_s, self_len, from_c, maxcount);
2620     if (count == 0) {
2621         /* no matches, return unchanged */
2622         return return_self(self);
2623     }
2624 
2625     /* use the difference between current and new, hence the "-1" */
2626     /*   result_len = self_len + count * (to_len-1)  */
2627     product = count * (to_len-1);
2628     if (product / (to_len-1) != count) {
2629         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630         return NULL;
2631     }
2632     result_len = self_len + product;
2633     if (result_len < 0) {
2634         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635         return NULL;
2636     }
2637 
2638     if ( (result = (PyStringObject *)
2639           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2640         return NULL;
2641     result_s = PyString_AS_STRING(result);
2642 
2643     start = self_s;
2644     end = self_s + self_len;
2645     while (count-- > 0) {
2646         next = findchar(start, end-start, from_c);
2647         if (next == NULL)
2648             break;
2649 
2650         if (next == start) {
2651             /* replace with the 'to' */
2652             Py_MEMCPY(result_s, to_s, to_len);
2653             result_s += to_len;
2654             start += 1;
2655         } else {
2656             /* copy the unchanged old then the 'to' */
2657             Py_MEMCPY(result_s, start, next-start);
2658             result_s += (next-start);
2659             Py_MEMCPY(result_s, to_s, to_len);
2660             result_s += to_len;
2661             start = next+1;
2662         }
2663     }
2664     /* Copy the remainder of the remaining string */
2665     Py_MEMCPY(result_s, start, end-start);
2666 
2667     return result;
2668 }
2669 
2670 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2671 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2672 replace_substring(PyStringObject *self,
2673                   const char *from_s, Py_ssize_t from_len,
2674                   const char *to_s, Py_ssize_t to_len,
2675                   Py_ssize_t maxcount) {
2676     char *self_s, *result_s;
2677     char *start, *next, *end;
2678     Py_ssize_t self_len, result_len;
2679     Py_ssize_t count, offset, product;
2680     PyStringObject *result;
2681 
2682     self_s = PyString_AS_STRING(self);
2683     self_len = PyString_GET_SIZE(self);
2684 
2685     count = stringlib_count(self_s, self_len,
2686                             from_s, from_len,
2687                             maxcount);
2688 
2689     if (count == 0) {
2690         /* no matches, return unchanged */
2691         return return_self(self);
2692     }
2693 
2694     /* Check for overflow */
2695     /*    result_len = self_len + count * (to_len-from_len) */
2696     product = count * (to_len-from_len);
2697     if (product / (to_len-from_len) != count) {
2698         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699         return NULL;
2700     }
2701     result_len = self_len + product;
2702     if (result_len < 0) {
2703         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2704         return NULL;
2705     }
2706 
2707     if ( (result = (PyStringObject *)
2708           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2709         return NULL;
2710     result_s = PyString_AS_STRING(result);
2711 
2712     start = self_s;
2713     end = self_s + self_len;
2714     while (count-- > 0) {
2715         offset = stringlib_find(start, end-start,
2716                                 from_s, from_len,
2717                                 0);
2718         if (offset == -1)
2719             break;
2720         next = start+offset;
2721         if (next == start) {
2722             /* replace with the 'to' */
2723             Py_MEMCPY(result_s, to_s, to_len);
2724             result_s += to_len;
2725             start += from_len;
2726         } else {
2727             /* copy the unchanged old then the 'to' */
2728             Py_MEMCPY(result_s, start, next-start);
2729             result_s += (next-start);
2730             Py_MEMCPY(result_s, to_s, to_len);
2731             result_s += to_len;
2732             start = next+from_len;
2733         }
2734     }
2735     /* Copy the remainder of the remaining string */
2736     Py_MEMCPY(result_s, start, end-start);
2737 
2738     return result;
2739 }
2740 
2741 
2742 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2743 replace(PyStringObject *self,
2744     const char *from_s, Py_ssize_t from_len,
2745     const char *to_s, Py_ssize_t to_len,
2746     Py_ssize_t maxcount)
2747 {
2748     if (maxcount < 0) {
2749         maxcount = PY_SSIZE_T_MAX;
2750     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2751         /* nothing to do; return the original string */
2752         return return_self(self);
2753     }
2754 
2755     if (maxcount == 0 ||
2756         (from_len == 0 && to_len == 0)) {
2757         /* nothing to do; return the original string */
2758         return return_self(self);
2759     }
2760 
2761     /* Handle zero-length special cases */
2762 
2763     if (from_len == 0) {
2764         /* insert the 'to' string everywhere.   */
2765         /*    >>> "Python".replace("", ".")     */
2766         /*    '.P.y.t.h.o.n.'                   */
2767         return replace_interleave(self, to_s, to_len, maxcount);
2768     }
2769 
2770     /* Except for "".replace("", "A") == "A" there is no way beyond this */
2771     /* point for an empty self string to generate a non-empty string */
2772     /* Special case so the remaining code always gets a non-empty string */
2773     if (PyString_GET_SIZE(self) == 0) {
2774         return return_self(self);
2775     }
2776 
2777     if (to_len == 0) {
2778         /* delete all occurrences of 'from' string */
2779         if (from_len == 1) {
2780             return replace_delete_single_character(
2781                 self, from_s[0], maxcount);
2782         } else {
2783             return replace_delete_substring(self, from_s, from_len, maxcount);
2784         }
2785     }
2786 
2787     /* Handle special case where both strings have the same length */
2788 
2789     if (from_len == to_len) {
2790         if (from_len == 1) {
2791             return replace_single_character_in_place(
2792                 self,
2793                 from_s[0],
2794                 to_s[0],
2795                 maxcount);
2796         } else {
2797             return replace_substring_in_place(
2798                 self, from_s, from_len, to_s, to_len, maxcount);
2799         }
2800     }
2801 
2802     /* Otherwise use the more generic algorithms */
2803     if (from_len == 1) {
2804         return replace_single_character(self, from_s[0],
2805                                         to_s, to_len, maxcount);
2806     } else {
2807         /* len('from')>=2, len('to')>=1 */
2808         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2809     }
2810 }
2811 
2812 PyDoc_STRVAR(replace__doc__,
2813 "S.replace(old, new[, count]) -> string\n\
2814 \n\
2815 Return a copy of string S with all occurrences of substring\n\
2816 old replaced by new.  If the optional argument count is\n\
2817 given, only the first count occurrences are replaced.");
2818 
2819 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2820 string_replace(PyStringObject *self, PyObject *args)
2821 {
2822     Py_ssize_t count = -1;
2823     PyObject *from, *to;
2824     const char *from_s, *to_s;
2825     Py_ssize_t from_len, to_len;
2826 
2827     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2828         return NULL;
2829 
2830     if (PyString_Check(from)) {
2831         from_s = PyString_AS_STRING(from);
2832         from_len = PyString_GET_SIZE(from);
2833     }
2834 #ifdef Py_USING_UNICODE
2835     if (PyUnicode_Check(from))
2836         return PyUnicode_Replace((PyObject *)self,
2837                                  from, to, count);
2838 #endif
2839     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2840         return NULL;
2841 
2842     if (PyString_Check(to)) {
2843         to_s = PyString_AS_STRING(to);
2844         to_len = PyString_GET_SIZE(to);
2845     }
2846 #ifdef Py_USING_UNICODE
2847     else if (PyUnicode_Check(to))
2848         return PyUnicode_Replace((PyObject *)self,
2849                                  from, to, count);
2850 #endif
2851     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2852         return NULL;
2853 
2854     return (PyObject *)replace((PyStringObject *) self,
2855                                from_s, from_len,
2856                                to_s, to_len, count);
2857 }
2858 
2859 /** End DALKE **/
2860 
2861 /* Matches the end (direction >= 0) or start (direction < 0) of self
2862  * against substr, using the start and end arguments. Returns
2863  * -1 on error, 0 if not found and 1 if found.
2864  */
2865 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2866 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2867                   Py_ssize_t end, int direction)
2868 {
2869     Py_ssize_t len = PyString_GET_SIZE(self);
2870     Py_ssize_t slen;
2871     const char* sub;
2872     const char* str;
2873 
2874     if (PyString_Check(substr)) {
2875         sub = PyString_AS_STRING(substr);
2876         slen = PyString_GET_SIZE(substr);
2877     }
2878 #ifdef Py_USING_UNICODE
2879     else if (PyUnicode_Check(substr))
2880         return PyUnicode_Tailmatch((PyObject *)self,
2881                                    substr, start, end, direction);
2882 #endif
2883     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2884         return -1;
2885     str = PyString_AS_STRING(self);
2886 
2887     ADJUST_INDICES(start, end, len);
2888 
2889     if (direction < 0) {
2890         /* startswith */
2891         if (start+slen > len)
2892             return 0;
2893     } else {
2894         /* endswith */
2895         if (end-start < slen || start > len)
2896             return 0;
2897 
2898         if (end-slen > start)
2899             start = end - slen;
2900     }
2901     if (end-start >= slen)
2902         return ! memcmp(str+start, sub, slen);
2903     return 0;
2904 }
2905 
2906 
2907 PyDoc_STRVAR(startswith__doc__,
2908 "S.startswith(prefix[, start[, end]]) -> bool\n\
2909 \n\
2910 Return True if S starts with the specified prefix, False otherwise.\n\
2911 With optional start, test S beginning at that position.\n\
2912 With optional end, stop comparing S at that position.\n\
2913 prefix can also be a tuple of strings to try.");
2914 
2915 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2916 string_startswith(PyStringObject *self, PyObject *args)
2917 {
2918     Py_ssize_t start = 0;
2919     Py_ssize_t end = PY_SSIZE_T_MAX;
2920     PyObject *subobj;
2921     int result;
2922 
2923     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2924         return NULL;
2925     if (PyTuple_Check(subobj)) {
2926         Py_ssize_t i;
2927         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2928             result = _string_tailmatch(self,
2929                             PyTuple_GET_ITEM(subobj, i),
2930                             start, end, -1);
2931             if (result == -1)
2932                 return NULL;
2933             else if (result) {
2934                 Py_RETURN_TRUE;
2935             }
2936         }
2937         Py_RETURN_FALSE;
2938     }
2939     result = _string_tailmatch(self, subobj, start, end, -1);
2940     if (result == -1) {
2941         if (PyErr_ExceptionMatches(PyExc_TypeError))
2942             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2943                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2944         return NULL;
2945     }
2946     else
2947         return PyBool_FromLong(result);
2948 }
2949 
2950 
2951 PyDoc_STRVAR(endswith__doc__,
2952 "S.endswith(suffix[, start[, end]]) -> bool\n\
2953 \n\
2954 Return True if S ends with the specified suffix, False otherwise.\n\
2955 With optional start, test S beginning at that position.\n\
2956 With optional end, stop comparing S at that position.\n\
2957 suffix can also be a tuple of strings to try.");
2958 
2959 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2960 string_endswith(PyStringObject *self, PyObject *args)
2961 {
2962     Py_ssize_t start = 0;
2963     Py_ssize_t end = PY_SSIZE_T_MAX;
2964     PyObject *subobj;
2965     int result;
2966 
2967     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2968         return NULL;
2969     if (PyTuple_Check(subobj)) {
2970         Py_ssize_t i;
2971         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2972             result = _string_tailmatch(self,
2973                             PyTuple_GET_ITEM(subobj, i),
2974                             start, end, +1);
2975             if (result == -1)
2976                 return NULL;
2977             else if (result) {
2978                 Py_RETURN_TRUE;
2979             }
2980         }
2981         Py_RETURN_FALSE;
2982     }
2983     result = _string_tailmatch(self, subobj, start, end, +1);
2984     if (result == -1) {
2985         if (PyErr_ExceptionMatches(PyExc_TypeError))
2986             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2987                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2988         return NULL;
2989     }
2990     else
2991         return PyBool_FromLong(result);
2992 }
2993 
2994 
2995 PyDoc_STRVAR(encode__doc__,
2996 "S.encode([encoding[,errors]]) -> object\n\
2997 \n\
2998 Encodes S using the codec registered for encoding. encoding defaults\n\
2999 to the default encoding. errors may be given to set a different error\n\
3000 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3001 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3002 'xmlcharrefreplace' as well as any other name registered with\n\
3003 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3004 
3005 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3006 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3007 {
3008     static char *kwlist[] = {"encoding", "errors", 0};
3009     char *encoding = NULL;
3010     char *errors = NULL;
3011     PyObject *v;
3012 
3013     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3014                                      kwlist, &encoding, &errors))
3015         return NULL;
3016     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3017     if (v == NULL)
3018         goto onError;
3019     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3020         PyErr_Format(PyExc_TypeError,
3021                      "encoder did not return a string/unicode object "
3022                      "(type=%.400s)",
3023                      Py_TYPE(v)->tp_name);
3024         Py_DECREF(v);
3025         return NULL;
3026     }
3027     return v;
3028 
3029  onError:
3030     return NULL;
3031 }
3032 
3033 
3034 PyDoc_STRVAR(decode__doc__,
3035 "S.decode([encoding[,errors]]) -> object\n\
3036 \n\
3037 Decodes S using the codec registered for encoding. encoding defaults\n\
3038 to the default encoding. errors may be given to set a different error\n\
3039 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3040 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3041 as well as any other name registered with codecs.register_error that is\n\
3042 able to handle UnicodeDecodeErrors.");
3043 
3044 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3045 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3046 {
3047     static char *kwlist[] = {"encoding", "errors", 0};
3048     char *encoding = NULL;
3049     char *errors = NULL;
3050     PyObject *v;
3051 
3052     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3053                                      kwlist, &encoding, &errors))
3054         return NULL;
3055     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3056     if (v == NULL)
3057         goto onError;
3058     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3059         PyErr_Format(PyExc_TypeError,
3060                      "decoder did not return a string/unicode object "
3061                      "(type=%.400s)",
3062                      Py_TYPE(v)->tp_name);
3063         Py_DECREF(v);
3064         return NULL;
3065     }
3066     return v;
3067 
3068  onError:
3069     return NULL;
3070 }
3071 
3072 
3073 PyDoc_STRVAR(expandtabs__doc__,
3074 "S.expandtabs([tabsize]) -> string\n\
3075 \n\
3076 Return a copy of S where all tab characters are expanded using spaces.\n\
3077 If tabsize is not given, a tab size of 8 characters is assumed.");
3078 
3079 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3080 string_expandtabs(PyStringObject *self, PyObject *args)
3081 {
3082     const char *e, *p, *qe;
3083     char *q;
3084     Py_ssize_t i, j, incr;
3085     PyObject *u;
3086     int tabsize = 8;
3087 
3088     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3089         return NULL;
3090 
3091     /* First pass: determine size of output string */
3092     i = 0; /* chars up to and including most recent \n or \r */
3093     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3094     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3095     for (p = PyString_AS_STRING(self); p < e; p++) {
3096         if (*p == '\t') {
3097             if (tabsize > 0) {
3098                 incr = tabsize - (j % tabsize);
3099                 if (j > PY_SSIZE_T_MAX - incr)
3100                     goto overflow1;
3101                 j += incr;
3102             }
3103         }
3104         else {
3105             if (j > PY_SSIZE_T_MAX - 1)
3106                 goto overflow1;
3107             j++;
3108             if (*p == '\n' || *p == '\r') {
3109                 if (i > PY_SSIZE_T_MAX - j)
3110                     goto overflow1;
3111                 i += j;
3112                 j = 0;
3113             }
3114         }
3115     }
3116 
3117     if (i > PY_SSIZE_T_MAX - j)
3118         goto overflow1;
3119 
3120     /* Second pass: create output string and fill it */
3121     u = PyString_FromStringAndSize(NULL, i + j);
3122     if (!u)
3123         return NULL;
3124 
3125     j = 0; /* same as in first pass */
3126     q = PyString_AS_STRING(u); /* next output char */
3127     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3128 
3129     for (p = PyString_AS_STRING(self); p < e; p++) {
3130         if (*p == '\t') {
3131             if (tabsize > 0) {
3132                 i = tabsize - (j % tabsize);
3133                 j += i;
3134                 while (i--) {
3135                     if (q >= qe)
3136                         goto overflow2;
3137                     *q++ = ' ';
3138                 }
3139             }
3140         }
3141         else {
3142             if (q >= qe)
3143                 goto overflow2;
3144             *q++ = *p;
3145             j++;
3146             if (*p == '\n' || *p == '\r')
3147                 j = 0;
3148         }
3149     }
3150 
3151     return u;
3152 
3153   overflow2:
3154     Py_DECREF(u);
3155   overflow1:
3156     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3157     return NULL;
3158 }
3159 
3160 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3161 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3162 {
3163     PyObject *u;
3164 
3165     if (left < 0)
3166         left = 0;
3167     if (right < 0)
3168         right = 0;
3169 
3170     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3171         Py_INCREF(self);
3172         return (PyObject *)self;
3173     }
3174 
3175     u = PyString_FromStringAndSize(NULL,
3176                                    left + PyString_GET_SIZE(self) + right);
3177     if (u) {
3178         if (left)
3179             memset(PyString_AS_STRING(u), fill, left);
3180         Py_MEMCPY(PyString_AS_STRING(u) + left,
3181                PyString_AS_STRING(self),
3182                PyString_GET_SIZE(self));
3183         if (right)
3184             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3185                fill, right);
3186     }
3187 
3188     return u;
3189 }
3190 
3191 PyDoc_STRVAR(ljust__doc__,
3192 "S.ljust(width[, fillchar]) -> string\n"
3193 "\n"
3194 "Return S left-justified in a string of length width. Padding is\n"
3195 "done using the specified fill character (default is a space).");
3196 
3197 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3198 string_ljust(PyStringObject *self, PyObject *args)
3199 {
3200     Py_ssize_t width;
3201     char fillchar = ' ';
3202 
3203     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3204         return NULL;
3205 
3206     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3207         Py_INCREF(self);
3208         return (PyObject*) self;
3209     }
3210 
3211     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3212 }
3213 
3214 
3215 PyDoc_STRVAR(rjust__doc__,
3216 "S.rjust(width[, fillchar]) -> string\n"
3217 "\n"
3218 "Return S right-justified in a string of length width. Padding is\n"
3219 "done using the specified fill character (default is a space)");
3220 
3221 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3222 string_rjust(PyStringObject *self, PyObject *args)
3223 {
3224     Py_ssize_t width;
3225     char fillchar = ' ';
3226 
3227     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3228         return NULL;
3229 
3230     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3231         Py_INCREF(self);
3232         return (PyObject*) self;
3233     }
3234 
3235     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3236 }
3237 
3238 
3239 PyDoc_STRVAR(center__doc__,
3240 "S.center(width[, fillchar]) -> string\n"
3241 "\n"
3242 "Return S centered in a string of length width. Padding is\n"
3243 "done using the specified fill character (default is a space)");
3244 
3245 static PyObject *
string_center(PyStringObject * self,PyObject * args)3246 string_center(PyStringObject *self, PyObject *args)
3247 {
3248     Py_ssize_t marg, left;
3249     Py_ssize_t width;
3250     char fillchar = ' ';
3251 
3252     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3253         return NULL;
3254 
3255     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3256         Py_INCREF(self);
3257         return (PyObject*) self;
3258     }
3259 
3260     marg = width - PyString_GET_SIZE(self);
3261     left = marg / 2 + (marg & width & 1);
3262 
3263     return pad(self, left, marg - left, fillchar);
3264 }
3265 
3266 PyDoc_STRVAR(zfill__doc__,
3267 "S.zfill(width) -> string\n"
3268 "\n"
3269 "Pad a numeric string S with zeros on the left, to fill a field\n"
3270 "of the specified width.  The string S is never truncated.");
3271 
3272 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3273 string_zfill(PyStringObject *self, PyObject *args)
3274 {
3275     Py_ssize_t fill;
3276     PyObject *s;
3277     char *p;
3278     Py_ssize_t width;
3279 
3280     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3281         return NULL;
3282 
3283     if (PyString_GET_SIZE(self) >= width) {
3284         if (PyString_CheckExact(self)) {
3285             Py_INCREF(self);
3286             return (PyObject*) self;
3287         }
3288         else
3289             return PyString_FromStringAndSize(
3290                 PyString_AS_STRING(self),
3291                 PyString_GET_SIZE(self)
3292             );
3293     }
3294 
3295     fill = width - PyString_GET_SIZE(self);
3296 
3297     s = pad(self, fill, 0, '0');
3298 
3299     if (s == NULL)
3300         return NULL;
3301 
3302     p = PyString_AS_STRING(s);
3303     if (p[fill] == '+' || p[fill] == '-') {
3304         /* move sign to beginning of string */
3305         p[0] = p[fill];
3306         p[fill] = '0';
3307     }
3308 
3309     return (PyObject*) s;
3310 }
3311 
3312 PyDoc_STRVAR(isspace__doc__,
3313 "S.isspace() -> bool\n\
3314 \n\
3315 Return True if all characters in S are whitespace\n\
3316 and there is at least one character in S, False otherwise.");
3317 
3318 static PyObject*
string_isspace(PyStringObject * self)3319 string_isspace(PyStringObject *self)
3320 {
3321     register const unsigned char *p
3322         = (unsigned char *) PyString_AS_STRING(self);
3323     register const unsigned char *e;
3324 
3325     /* Shortcut for single character strings */
3326     if (PyString_GET_SIZE(self) == 1 &&
3327         isspace(*p))
3328         return PyBool_FromLong(1);
3329 
3330     /* Special case for empty strings */
3331     if (PyString_GET_SIZE(self) == 0)
3332         return PyBool_FromLong(0);
3333 
3334     e = p + PyString_GET_SIZE(self);
3335     for (; p < e; p++) {
3336         if (!isspace(*p))
3337             return PyBool_FromLong(0);
3338     }
3339     return PyBool_FromLong(1);
3340 }
3341 
3342 
3343 PyDoc_STRVAR(isalpha__doc__,
3344 "S.isalpha() -> bool\n\
3345 \n\
3346 Return True if all characters in S are alphabetic\n\
3347 and there is at least one character in S, False otherwise.");
3348 
3349 static PyObject*
string_isalpha(PyStringObject * self)3350 string_isalpha(PyStringObject *self)
3351 {
3352     register const unsigned char *p
3353         = (unsigned char *) PyString_AS_STRING(self);
3354     register const unsigned char *e;
3355 
3356     /* Shortcut for single character strings */
3357     if (PyString_GET_SIZE(self) == 1 &&
3358         isalpha(*p))
3359         return PyBool_FromLong(1);
3360 
3361     /* Special case for empty strings */
3362     if (PyString_GET_SIZE(self) == 0)
3363         return PyBool_FromLong(0);
3364 
3365     e = p + PyString_GET_SIZE(self);
3366     for (; p < e; p++) {
3367         if (!isalpha(*p))
3368             return PyBool_FromLong(0);
3369     }
3370     return PyBool_FromLong(1);
3371 }
3372 
3373 
3374 PyDoc_STRVAR(isalnum__doc__,
3375 "S.isalnum() -> bool\n\
3376 \n\
3377 Return True if all characters in S are alphanumeric\n\
3378 and there is at least one character in S, False otherwise.");
3379 
3380 static PyObject*
string_isalnum(PyStringObject * self)3381 string_isalnum(PyStringObject *self)
3382 {
3383     register const unsigned char *p
3384         = (unsigned char *) PyString_AS_STRING(self);
3385     register const unsigned char *e;
3386 
3387     /* Shortcut for single character strings */
3388     if (PyString_GET_SIZE(self) == 1 &&
3389         isalnum(*p))
3390         return PyBool_FromLong(1);
3391 
3392     /* Special case for empty strings */
3393     if (PyString_GET_SIZE(self) == 0)
3394         return PyBool_FromLong(0);
3395 
3396     e = p + PyString_GET_SIZE(self);
3397     for (; p < e; p++) {
3398         if (!isalnum(*p))
3399             return PyBool_FromLong(0);
3400     }
3401     return PyBool_FromLong(1);
3402 }
3403 
3404 
3405 PyDoc_STRVAR(isdigit__doc__,
3406 "S.isdigit() -> bool\n\
3407 \n\
3408 Return True if all characters in S are digits\n\
3409 and there is at least one character in S, False otherwise.");
3410 
3411 static PyObject*
string_isdigit(PyStringObject * self)3412 string_isdigit(PyStringObject *self)
3413 {
3414     register const unsigned char *p
3415         = (unsigned char *) PyString_AS_STRING(self);
3416     register const unsigned char *e;
3417 
3418     /* Shortcut for single character strings */
3419     if (PyString_GET_SIZE(self) == 1 &&
3420         isdigit(*p))
3421         return PyBool_FromLong(1);
3422 
3423     /* Special case for empty strings */
3424     if (PyString_GET_SIZE(self) == 0)
3425         return PyBool_FromLong(0);
3426 
3427     e = p + PyString_GET_SIZE(self);
3428     for (; p < e; p++) {
3429         if (!isdigit(*p))
3430             return PyBool_FromLong(0);
3431     }
3432     return PyBool_FromLong(1);
3433 }
3434 
3435 
3436 PyDoc_STRVAR(islower__doc__,
3437 "S.islower() -> bool\n\
3438 \n\
3439 Return True if all cased characters in S are lowercase and there is\n\
3440 at least one cased character in S, False otherwise.");
3441 
3442 static PyObject*
string_islower(PyStringObject * self)3443 string_islower(PyStringObject *self)
3444 {
3445     register const unsigned char *p
3446         = (unsigned char *) PyString_AS_STRING(self);
3447     register const unsigned char *e;
3448     int cased;
3449 
3450     /* Shortcut for single character strings */
3451     if (PyString_GET_SIZE(self) == 1)
3452         return PyBool_FromLong(islower(*p) != 0);
3453 
3454     /* Special case for empty strings */
3455     if (PyString_GET_SIZE(self) == 0)
3456         return PyBool_FromLong(0);
3457 
3458     e = p + PyString_GET_SIZE(self);
3459     cased = 0;
3460     for (; p < e; p++) {
3461         if (isupper(*p))
3462             return PyBool_FromLong(0);
3463         else if (!cased && islower(*p))
3464             cased = 1;
3465     }
3466     return PyBool_FromLong(cased);
3467 }
3468 
3469 
3470 PyDoc_STRVAR(isupper__doc__,
3471 "S.isupper() -> bool\n\
3472 \n\
3473 Return True if all cased characters in S are uppercase and there is\n\
3474 at least one cased character in S, False otherwise.");
3475 
3476 static PyObject*
string_isupper(PyStringObject * self)3477 string_isupper(PyStringObject *self)
3478 {
3479     register const unsigned char *p
3480         = (unsigned char *) PyString_AS_STRING(self);
3481     register const unsigned char *e;
3482     int cased;
3483 
3484     /* Shortcut for single character strings */
3485     if (PyString_GET_SIZE(self) == 1)
3486         return PyBool_FromLong(isupper(*p) != 0);
3487 
3488     /* Special case for empty strings */
3489     if (PyString_GET_SIZE(self) == 0)
3490         return PyBool_FromLong(0);
3491 
3492     e = p + PyString_GET_SIZE(self);
3493     cased = 0;
3494     for (; p < e; p++) {
3495         if (islower(*p))
3496             return PyBool_FromLong(0);
3497         else if (!cased && isupper(*p))
3498             cased = 1;
3499     }
3500     return PyBool_FromLong(cased);
3501 }
3502 
3503 
3504 PyDoc_STRVAR(istitle__doc__,
3505 "S.istitle() -> bool\n\
3506 \n\
3507 Return True if S is a titlecased string and there is at least one\n\
3508 character in S, i.e. uppercase characters may only follow uncased\n\
3509 characters and lowercase characters only cased ones. Return False\n\
3510 otherwise.");
3511 
3512 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3513 string_istitle(PyStringObject *self, PyObject *uncased)
3514 {
3515     register const unsigned char *p
3516         = (unsigned char *) PyString_AS_STRING(self);
3517     register const unsigned char *e;
3518     int cased, previous_is_cased;
3519 
3520     /* Shortcut for single character strings */
3521     if (PyString_GET_SIZE(self) == 1)
3522         return PyBool_FromLong(isupper(*p) != 0);
3523 
3524     /* Special case for empty strings */
3525     if (PyString_GET_SIZE(self) == 0)
3526         return PyBool_FromLong(0);
3527 
3528     e = p + PyString_GET_SIZE(self);
3529     cased = 0;
3530     previous_is_cased = 0;
3531     for (; p < e; p++) {
3532         register const unsigned char ch = *p;
3533 
3534         if (isupper(ch)) {
3535             if (previous_is_cased)
3536                 return PyBool_FromLong(0);
3537             previous_is_cased = 1;
3538             cased = 1;
3539         }
3540         else if (islower(ch)) {
3541             if (!previous_is_cased)
3542                 return PyBool_FromLong(0);
3543             previous_is_cased = 1;
3544             cased = 1;
3545         }
3546         else
3547             previous_is_cased = 0;
3548     }
3549     return PyBool_FromLong(cased);
3550 }
3551 
3552 
3553 PyDoc_STRVAR(splitlines__doc__,
3554 "S.splitlines(keepends=False) -> list of strings\n\
3555 \n\
3556 Return a list of the lines in S, breaking at line boundaries.\n\
3557 Line breaks are not included in the resulting list unless keepends\n\
3558 is given and true.");
3559 
3560 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3561 string_splitlines(PyStringObject *self, PyObject *args)
3562 {
3563     int keepends = 0;
3564 
3565     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3566         return NULL;
3567 
3568     return stringlib_splitlines(
3569         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3570         keepends
3571     );
3572 }
3573 
3574 PyDoc_STRVAR(sizeof__doc__,
3575 "S.__sizeof__() -> size of S in memory, in bytes");
3576 
3577 static PyObject *
string_sizeof(PyStringObject * v)3578 string_sizeof(PyStringObject *v)
3579 {
3580     Py_ssize_t res;
3581     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3582     return PyInt_FromSsize_t(res);
3583 }
3584 
3585 static PyObject *
string_getnewargs(PyStringObject * v)3586 string_getnewargs(PyStringObject *v)
3587 {
3588     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3589 }
3590 
3591 
3592 #include "stringlib/string_format.h"
3593 
3594 PyDoc_STRVAR(format__doc__,
3595 "S.format(*args, **kwargs) -> string\n\
3596 \n\
3597 Return a formatted version of S, using substitutions from args and kwargs.\n\
3598 The substitutions are identified by braces ('{' and '}').");
3599 
3600 static PyObject *
string__format__(PyObject * self,PyObject * args)3601 string__format__(PyObject* self, PyObject* args)
3602 {
3603     PyObject *format_spec;
3604     PyObject *result = NULL;
3605     PyObject *tmp = NULL;
3606 
3607     /* If 2.x, convert format_spec to the same type as value */
3608     /* This is to allow things like u''.format('') */
3609     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3610         goto done;
3611     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3612         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3613                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3614         goto done;
3615     }
3616     tmp = PyObject_Str(format_spec);
3617     if (tmp == NULL)
3618         goto done;
3619     format_spec = tmp;
3620 
3621     result = _PyBytes_FormatAdvanced(self,
3622                                      PyString_AS_STRING(format_spec),
3623                                      PyString_GET_SIZE(format_spec));
3624 done:
3625     Py_XDECREF(tmp);
3626     return result;
3627 }
3628 
3629 PyDoc_STRVAR(p_format__doc__,
3630 "S.__format__(format_spec) -> string\n\
3631 \n\
3632 Return a formatted version of S as described by format_spec.");
3633 
3634 
3635 static PyMethodDef
3636 string_methods[] = {
3637     /* Counterparts of the obsolete stropmodule functions; except
3638        string.maketrans(). */
3639     {"join", (PyCFunction)string_join, METH_O, join__doc__},
3640     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3641     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3642     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3643     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3644     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3645     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3646     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3647     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3648     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3649     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3650     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3651     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3652      capitalize__doc__},
3653     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3654     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3655      endswith__doc__},
3656     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3657     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3658     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3659     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3660     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3661     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3662     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3663     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3664     {"rpartition", (PyCFunction)string_rpartition, METH_O,
3665      rpartition__doc__},
3666     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3667      startswith__doc__},
3668     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3669     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3670      swapcase__doc__},
3671     {"translate", (PyCFunction)string_translate, METH_VARARGS,
3672      translate__doc__},
3673     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3674     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3675     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3676     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3677     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3678     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3679     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3680     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3681     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3682     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3683     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3684     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3685      expandtabs__doc__},
3686     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3687      splitlines__doc__},
3688     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3689      sizeof__doc__},
3690     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3691     {NULL,     NULL}                         /* sentinel */
3692 };
3693 
3694 static PyObject *
3695 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3696 
3697 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3698 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3699 {
3700     PyObject *x = NULL;
3701     static char *kwlist[] = {"object", 0};
3702 
3703     if (type != &PyString_Type)
3704         return str_subtype_new(type, args, kwds);
3705     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3706         return NULL;
3707     if (x == NULL)
3708         return PyString_FromString("");
3709     return PyObject_Str(x);
3710 }
3711 
3712 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3713 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3714 {
3715     PyObject *tmp, *pnew;
3716     Py_ssize_t n;
3717 
3718     assert(PyType_IsSubtype(type, &PyString_Type));
3719     tmp = string_new(&PyString_Type, args, kwds);
3720     if (tmp == NULL)
3721         return NULL;
3722     assert(PyString_Check(tmp));
3723     n = PyString_GET_SIZE(tmp);
3724     pnew = type->tp_alloc(type, n);
3725     if (pnew != NULL) {
3726         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3727         ((PyStringObject *)pnew)->ob_shash =
3728             ((PyStringObject *)tmp)->ob_shash;
3729         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3730     }
3731     Py_DECREF(tmp);
3732     return pnew;
3733 }
3734 
3735 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3736 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3737 {
3738     PyErr_SetString(PyExc_TypeError,
3739                     "The basestring type cannot be instantiated");
3740     return NULL;
3741 }
3742 
3743 static PyObject *
string_mod(PyObject * v,PyObject * w)3744 string_mod(PyObject *v, PyObject *w)
3745 {
3746     if (!PyString_Check(v)) {
3747         Py_INCREF(Py_NotImplemented);
3748         return Py_NotImplemented;
3749     }
3750     return PyString_Format(v, w);
3751 }
3752 
3753 PyDoc_STRVAR(basestring_doc,
3754 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3755 
3756 static PyNumberMethods string_as_number = {
3757     0,                          /*nb_add*/
3758     0,                          /*nb_subtract*/
3759     0,                          /*nb_multiply*/
3760     0,                          /*nb_divide*/
3761     string_mod,                 /*nb_remainder*/
3762 };
3763 
3764 
3765 PyTypeObject PyBaseString_Type = {
3766     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3767     "basestring",
3768     0,
3769     0,
3770     0,                                          /* tp_dealloc */
3771     0,                                          /* tp_print */
3772     0,                                          /* tp_getattr */
3773     0,                                          /* tp_setattr */
3774     0,                                          /* tp_compare */
3775     0,                                          /* tp_repr */
3776     0,                                          /* tp_as_number */
3777     0,                                          /* tp_as_sequence */
3778     0,                                          /* tp_as_mapping */
3779     0,                                          /* tp_hash */
3780     0,                                          /* tp_call */
3781     0,                                          /* tp_str */
3782     0,                                          /* tp_getattro */
3783     0,                                          /* tp_setattro */
3784     0,                                          /* tp_as_buffer */
3785     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3786     basestring_doc,                             /* tp_doc */
3787     0,                                          /* tp_traverse */
3788     0,                                          /* tp_clear */
3789     0,                                          /* tp_richcompare */
3790     0,                                          /* tp_weaklistoffset */
3791     0,                                          /* tp_iter */
3792     0,                                          /* tp_iternext */
3793     0,                                          /* tp_methods */
3794     0,                                          /* tp_members */
3795     0,                                          /* tp_getset */
3796     &PyBaseObject_Type,                         /* tp_base */
3797     0,                                          /* tp_dict */
3798     0,                                          /* tp_descr_get */
3799     0,                                          /* tp_descr_set */
3800     0,                                          /* tp_dictoffset */
3801     0,                                          /* tp_init */
3802     0,                                          /* tp_alloc */
3803     basestring_new,                             /* tp_new */
3804     0,                                          /* tp_free */
3805 };
3806 
3807 PyDoc_STRVAR(string_doc,
3808 "str(object='') -> string\n\
3809 \n\
3810 Return a nice string representation of the object.\n\
3811 If the argument is a string, the return value is the same object.");
3812 
3813 PyTypeObject PyString_Type = {
3814     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3815     "str",
3816     PyStringObject_SIZE,
3817     sizeof(char),
3818     string_dealloc,                             /* tp_dealloc */
3819     (printfunc)string_print,                    /* tp_print */
3820     0,                                          /* tp_getattr */
3821     0,                                          /* tp_setattr */
3822     0,                                          /* tp_compare */
3823     string_repr,                                /* tp_repr */
3824     &string_as_number,                          /* tp_as_number */
3825     &string_as_sequence,                        /* tp_as_sequence */
3826     &string_as_mapping,                         /* tp_as_mapping */
3827     (hashfunc)string_hash,                      /* tp_hash */
3828     0,                                          /* tp_call */
3829     string_str,                                 /* tp_str */
3830     PyObject_GenericGetAttr,                    /* tp_getattro */
3831     0,                                          /* tp_setattro */
3832     &string_as_buffer,                          /* tp_as_buffer */
3833     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3834         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3835         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3836     string_doc,                                 /* tp_doc */
3837     0,                                          /* tp_traverse */
3838     0,                                          /* tp_clear */
3839     (richcmpfunc)string_richcompare,            /* tp_richcompare */
3840     0,                                          /* tp_weaklistoffset */
3841     0,                                          /* tp_iter */
3842     0,                                          /* tp_iternext */
3843     string_methods,                             /* tp_methods */
3844     0,                                          /* tp_members */
3845     0,                                          /* tp_getset */
3846     &PyBaseString_Type,                         /* tp_base */
3847     0,                                          /* tp_dict */
3848     0,                                          /* tp_descr_get */
3849     0,                                          /* tp_descr_set */
3850     0,                                          /* tp_dictoffset */
3851     0,                                          /* tp_init */
3852     0,                                          /* tp_alloc */
3853     string_new,                                 /* tp_new */
3854     PyObject_Del,                               /* tp_free */
3855 };
3856 
3857 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3858 PyString_Concat(register PyObject **pv, register PyObject *w)
3859 {
3860     register PyObject *v;
3861     if (*pv == NULL)
3862         return;
3863     if (w == NULL || !PyString_Check(*pv)) {
3864         Py_CLEAR(*pv);
3865         return;
3866     }
3867     v = string_concat((PyStringObject *) *pv, w);
3868     Py_SETREF(*pv, v);
3869 }
3870 
3871 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3872 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3873 {
3874     PyString_Concat(pv, w);
3875     Py_XDECREF(w);
3876 }
3877 
3878 
3879 /* The following function breaks the notion that strings are immutable:
3880    it changes the size of a string.  We get away with this only if there
3881    is only one module referencing the object.  You can also think of it
3882    as creating a new string object and destroying the old one, only
3883    more efficiently.  In any case, don't use this if the string may
3884    already be known to some other part of the code...
3885    Note that if there's not enough memory to resize the string, the original
3886    string object at *pv is deallocated, *pv is set to NULL, an "out of
3887    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3888    returned, and the value in *pv may or may not be the same as on input.
3889    As always, an extra byte is allocated for a trailing \0 byte (newsize
3890    does *not* include that), and a trailing \0 byte is stored.
3891 */
3892 
3893 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3894 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3895 {
3896     register PyObject *v;
3897     register PyStringObject *sv;
3898     v = *pv;
3899     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3900         PyString_CHECK_INTERNED(v)) {
3901         *pv = 0;
3902         Py_DECREF(v);
3903         PyErr_BadInternalCall();
3904         return -1;
3905     }
3906     /* XXX UNREF/NEWREF interface should be more symmetrical */
3907     _Py_DEC_REFTOTAL;
3908     _Py_ForgetReference(v);
3909     *pv = (PyObject *)
3910         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3911     if (*pv == NULL) {
3912         PyObject_Del(v);
3913         PyErr_NoMemory();
3914         return -1;
3915     }
3916     _Py_NewReference(*pv);
3917     sv = (PyStringObject *) *pv;
3918     Py_SIZE(sv) = newsize;
3919     sv->ob_sval[newsize] = '\0';
3920     sv->ob_shash = -1;          /* invalidate cached hash value */
3921     return 0;
3922 }
3923 
3924 /* Helpers for formatstring */
3925 
3926 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3927 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3928 {
3929     Py_ssize_t argidx = *p_argidx;
3930     if (argidx < arglen) {
3931         (*p_argidx)++;
3932         if (arglen < 0)
3933             return args;
3934         else
3935             return PyTuple_GetItem(args, argidx);
3936     }
3937     PyErr_SetString(PyExc_TypeError,
3938                     "not enough arguments for format string");
3939     return NULL;
3940 }
3941 
3942 /* Format codes
3943  * F_LJUST      '-'
3944  * F_SIGN       '+'
3945  * F_BLANK      ' '
3946  * F_ALT        '#'
3947  * F_ZERO       '0'
3948  */
3949 #define F_LJUST (1<<0)
3950 #define F_SIGN  (1<<1)
3951 #define F_BLANK (1<<2)
3952 #define F_ALT   (1<<3)
3953 #define F_ZERO  (1<<4)
3954 
3955 /* Returns a new reference to a PyString object, or NULL on failure. */
3956 
3957 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3958 formatfloat(PyObject *v, int flags, int prec, int type)
3959 {
3960     char *p;
3961     PyObject *result;
3962     double x;
3963 
3964     x = PyFloat_AsDouble(v);
3965     if (x == -1.0 && PyErr_Occurred()) {
3966         PyErr_Format(PyExc_TypeError, "float argument required, "
3967                      "not %.200s", Py_TYPE(v)->tp_name);
3968         return NULL;
3969     }
3970 
3971     if (prec < 0)
3972         prec = 6;
3973 
3974     p = PyOS_double_to_string(x, type, prec,
3975                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3976 
3977     if (p == NULL)
3978         return NULL;
3979     result = PyString_FromStringAndSize(p, strlen(p));
3980     PyMem_Free(p);
3981     return result;
3982 }
3983 
3984 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3985  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3986  * Python's regular ints.
3987  * Return value:  a new PyString*, or NULL if error.
3988  *  .  *pbuf is set to point into it,
3989  *     *plen set to the # of chars following that.
3990  *     Caller must decref it when done using pbuf.
3991  *     The string starting at *pbuf is of the form
3992  *         "-"? ("0x" | "0X")? digit+
3993  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3994  *         set in flags.  The case of hex digits will be correct,
3995  *     There will be at least prec digits, zero-filled on the left if
3996  *         necessary to get that many.
3997  * val          object to be converted
3998  * flags        bitmask of format flags; only F_ALT is looked at
3999  * prec         minimum number of digits; 0-fill on left if needed
4000  * type         a character in [duoxX]; u acts the same as d
4001  *
4002  * CAUTION:  o, x and X conversions on regular ints can never
4003  * produce a '-' sign, but can for Python's unbounded ints.
4004  */
4005 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4006 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4007                      char **pbuf, int *plen)
4008 {
4009     PyObject *result = NULL, *r1;
4010     const char *s;
4011     char *buf;
4012     Py_ssize_t i;
4013     int sign;           /* 1 if '-', else 0 */
4014     int len;            /* number of characters */
4015     Py_ssize_t llen;
4016     int numdigits;      /* len == numnondigits + skipped + numdigits */
4017     int numnondigits, skipped, filled;
4018     const char *method;
4019 
4020     switch (type) {
4021     case 'd':
4022     case 'u':
4023         method = "str";
4024         result = Py_TYPE(val)->tp_str(val);
4025         break;
4026     case 'o':
4027         method = "oct";
4028         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4029         break;
4030     case 'x':
4031     case 'X':
4032         method = "hex";
4033         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4034         break;
4035     default:
4036         assert(!"'type' not in [duoxX]");
4037     }
4038     if (!result)
4039         return NULL;
4040 
4041     if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4042         Py_DECREF(result);
4043         return NULL;
4044     }
4045     if (llen > INT_MAX) {
4046         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4047         Py_DECREF(result);
4048         return NULL;
4049     }
4050     len = (int)llen;
4051     if (len > 0 && s[len-1] == 'L') {
4052         --len;
4053         if (len == 0)
4054             goto error;
4055     }
4056     sign = s[0] == '-';
4057     numnondigits = sign;
4058 
4059     /* Need to skip 0x, 0X or 0. */
4060     skipped = 0;
4061     switch (type) {
4062     case 'o':
4063         if (s[sign] != '0')
4064             goto error;
4065         /* If 0 is only digit, leave it alone. */
4066         if ((flags & F_ALT) == 0 && len - sign > 1)
4067             skipped = 1;
4068         break;
4069     case 'x':
4070     case 'X':
4071         if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4072             goto error;
4073         if ((flags & F_ALT) == 0)
4074             skipped = 2;
4075         else
4076             numnondigits += 2;
4077         break;
4078     }
4079     numdigits = len - numnondigits - skipped;
4080     if (numdigits <= 0)
4081         goto error;
4082 
4083     filled = prec - numdigits;
4084     if (filled < 0)
4085         filled = 0;
4086     len = numnondigits + filled + numdigits;
4087 
4088     /* To modify the string in-place, there can only be one reference. */
4089     if (skipped >= filled &&
4090         PyString_CheckExact(result) &&
4091         Py_REFCNT(result) == 1 &&
4092         !PyString_CHECK_INTERNED(result))
4093     {
4094         r1 = NULL;
4095         buf = (char *)s + skipped - filled;
4096     }
4097     else {
4098         r1 = result;
4099         result = PyString_FromStringAndSize(NULL, len);
4100         if (!result) {
4101             Py_DECREF(r1);
4102             return NULL;
4103         }
4104         buf = PyString_AS_STRING(result);
4105     }
4106 
4107     for (i = numnondigits; --i >= 0;)
4108         buf[i] = s[i];
4109     buf += numnondigits;
4110     s += numnondigits + skipped;
4111     for (i = 0; i < filled; i++)
4112         *buf++ = '0';
4113     if (r1 == NULL) {
4114         assert(buf == s);
4115         buf += numdigits;
4116     }
4117     else {
4118         for (i = 0; i < numdigits; i++)
4119             *buf++ = *s++;
4120     }
4121     *buf = '\0';
4122     buf -= len;
4123     Py_XDECREF(r1);
4124 
4125     /* Fix up case for hex conversions. */
4126     if (type == 'X') {
4127         /* Need to convert all lower case letters to upper case.
4128            and need to convert 0x to 0X (and -0x to -0X). */
4129         for (i = 0; i < len; i++) {
4130             if (buf[i] >= 'a' && buf[i] <= 'z')
4131                 buf[i] -= 'a'-'A';
4132         }
4133     }
4134     *pbuf = buf;
4135     *plen = len;
4136     return result;
4137 
4138 error:
4139     PyErr_Format(PyExc_ValueError,
4140                  "%%%c format: invalid result of __%s__ (type=%.200s)",
4141                  type, method, Py_TYPE(val)->tp_name);
4142     Py_DECREF(result);
4143     return NULL;
4144 }
4145 
4146 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4147 formatint(char *buf, size_t buflen, int flags,
4148           int prec, int type, PyObject *v)
4149 {
4150     /* fmt = '%#.' + `prec` + 'l' + `type`
4151        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4152        + 1 + 1 = 24 */
4153     char fmt[64];       /* plenty big enough! */
4154     char *sign;
4155     long x;
4156 
4157     x = PyInt_AsLong(v);
4158     if (x == -1 && PyErr_Occurred()) {
4159         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4160                      Py_TYPE(v)->tp_name);
4161         return -1;
4162     }
4163     if (x < 0 && type == 'u') {
4164         type = 'd';
4165     }
4166     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4167         sign = "-";
4168     else
4169         sign = "";
4170     if (prec < 0)
4171         prec = 1;
4172 
4173     if ((flags & F_ALT) &&
4174         (type == 'x' || type == 'X')) {
4175         /* When converting under %#x or %#X, there are a number
4176          * of issues that cause pain:
4177          * - when 0 is being converted, the C standard leaves off
4178          *   the '0x' or '0X', which is inconsistent with other
4179          *   %#x/%#X conversions and inconsistent with Python's
4180          *   hex() function
4181          * - there are platforms that violate the standard and
4182          *   convert 0 with the '0x' or '0X'
4183          *   (Metrowerks, Compaq Tru64)
4184          * - there are platforms that give '0x' when converting
4185          *   under %#X, but convert 0 in accordance with the
4186          *   standard (OS/2 EMX)
4187          *
4188          * We can achieve the desired consistency by inserting our
4189          * own '0x' or '0X' prefix, and substituting %x/%X in place
4190          * of %#x/%#X.
4191          *
4192          * Note that this is the same approach as used in
4193          * formatint() in unicodeobject.c
4194          */
4195         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4196                       sign, type, prec, type);
4197     }
4198     else {
4199         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4200                       sign, (flags&F_ALT) ? "#" : "",
4201                       prec, type);
4202     }
4203 
4204     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4205      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4206      */
4207     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4208         PyErr_SetString(PyExc_OverflowError,
4209             "formatted integer is too long (precision too large?)");
4210         return -1;
4211     }
4212     if (sign[0])
4213         PyOS_snprintf(buf, buflen, fmt, -x);
4214     else
4215         PyOS_snprintf(buf, buflen, fmt, x);
4216     return (int)strlen(buf);
4217 }
4218 
4219 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4220 formatchar(char *buf, size_t buflen, PyObject *v)
4221 {
4222     /* presume that the buffer is at least 2 characters long */
4223     if (PyString_Check(v)) {
4224         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4225             return -1;
4226     }
4227     else {
4228         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4229             return -1;
4230     }
4231     buf[1] = '\0';
4232     return 1;
4233 }
4234 
4235 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4236 
4237    FORMATBUFLEN is the length of the buffer in which the ints &
4238    chars are formatted. XXX This is a magic number. Each formatting
4239    routine does bounds checking to ensure no overflow, but a better
4240    solution may be to malloc a buffer of appropriate size for each
4241    format. For now, the current solution is sufficient.
4242 */
4243 #define FORMATBUFLEN (size_t)120
4244 
4245 PyObject *
PyString_Format(PyObject * format,PyObject * args)4246 PyString_Format(PyObject *format, PyObject *args)
4247 {
4248     char *fmt, *res;
4249     Py_ssize_t arglen, argidx;
4250     Py_ssize_t reslen, rescnt, fmtcnt;
4251     int args_owned = 0;
4252     PyObject *result, *orig_args;
4253 #ifdef Py_USING_UNICODE
4254     PyObject *v, *w;
4255 #endif
4256     PyObject *dict = NULL;
4257     if (format == NULL || !PyString_Check(format) || args == NULL) {
4258         PyErr_BadInternalCall();
4259         return NULL;
4260     }
4261     orig_args = args;
4262     fmt = PyString_AS_STRING(format);
4263     fmtcnt = PyString_GET_SIZE(format);
4264     reslen = rescnt = fmtcnt + 100;
4265     result = PyString_FromStringAndSize((char *)NULL, reslen);
4266     if (result == NULL)
4267         return NULL;
4268     res = PyString_AsString(result);
4269     if (PyTuple_Check(args)) {
4270         arglen = PyTuple_GET_SIZE(args);
4271         argidx = 0;
4272     }
4273     else {
4274         arglen = -1;
4275         argidx = -2;
4276     }
4277     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4278         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4279         dict = args;
4280     while (--fmtcnt >= 0) {
4281         if (*fmt != '%') {
4282             if (--rescnt < 0) {
4283                 rescnt = fmtcnt + 100;
4284                 reslen += rescnt;
4285                 if (_PyString_Resize(&result, reslen))
4286                     return NULL;
4287                 res = PyString_AS_STRING(result)
4288                     + reslen - rescnt;
4289                 --rescnt;
4290             }
4291             *res++ = *fmt++;
4292         }
4293         else {
4294             /* Got a format specifier */
4295             int flags = 0;
4296             Py_ssize_t width = -1;
4297             int prec = -1;
4298             int c = '\0';
4299             int fill;
4300             int isnumok;
4301             PyObject *v = NULL;
4302             PyObject *temp = NULL;
4303             char *pbuf;
4304             int sign;
4305             Py_ssize_t len;
4306             char formatbuf[FORMATBUFLEN];
4307                  /* For format{int,char}() */
4308 #ifdef Py_USING_UNICODE
4309             char *fmt_start = fmt;
4310             Py_ssize_t argidx_start = argidx;
4311 #endif
4312 
4313             fmt++;
4314             if (*fmt == '(') {
4315                 char *keystart;
4316                 Py_ssize_t keylen;
4317                 PyObject *key;
4318                 int pcount = 1;
4319 
4320                 if (dict == NULL) {
4321                     PyErr_SetString(PyExc_TypeError,
4322                              "format requires a mapping");
4323                     goto error;
4324                 }
4325                 ++fmt;
4326                 --fmtcnt;
4327                 keystart = fmt;
4328                 /* Skip over balanced parentheses */
4329                 while (pcount > 0 && --fmtcnt >= 0) {
4330                     if (*fmt == ')')
4331                         --pcount;
4332                     else if (*fmt == '(')
4333                         ++pcount;
4334                     fmt++;
4335                 }
4336                 keylen = fmt - keystart - 1;
4337                 if (fmtcnt < 0 || pcount > 0) {
4338                     PyErr_SetString(PyExc_ValueError,
4339                                "incomplete format key");
4340                     goto error;
4341                 }
4342                 key = PyString_FromStringAndSize(keystart,
4343                                                  keylen);
4344                 if (key == NULL)
4345                     goto error;
4346                 if (args_owned) {
4347                     Py_DECREF(args);
4348                     args_owned = 0;
4349                 }
4350                 args = PyObject_GetItem(dict, key);
4351                 Py_DECREF(key);
4352                 if (args == NULL) {
4353                     goto error;
4354                 }
4355                 args_owned = 1;
4356                 arglen = -1;
4357                 argidx = -2;
4358             }
4359             while (--fmtcnt >= 0) {
4360                 switch (c = *fmt++) {
4361                 case '-': flags |= F_LJUST; continue;
4362                 case '+': flags |= F_SIGN; continue;
4363                 case ' ': flags |= F_BLANK; continue;
4364                 case '#': flags |= F_ALT; continue;
4365                 case '0': flags |= F_ZERO; continue;
4366                 }
4367                 break;
4368             }
4369             if (c == '*') {
4370                 v = getnextarg(args, arglen, &argidx);
4371                 if (v == NULL)
4372                     goto error;
4373                 if (!PyInt_Check(v)) {
4374                     PyErr_SetString(PyExc_TypeError,
4375                                     "* wants int");
4376                     goto error;
4377                 }
4378                 width = PyInt_AsSsize_t(v);
4379                 if (width == -1 && PyErr_Occurred())
4380                     goto error;
4381                 if (width < 0) {
4382                     flags |= F_LJUST;
4383                     width = -width;
4384                 }
4385                 if (--fmtcnt >= 0)
4386                     c = *fmt++;
4387             }
4388             else if (c >= 0 && isdigit(c)) {
4389                 width = c - '0';
4390                 while (--fmtcnt >= 0) {
4391                     c = Py_CHARMASK(*fmt++);
4392                     if (!isdigit(c))
4393                         break;
4394                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4395                         PyErr_SetString(
4396                             PyExc_ValueError,
4397                             "width too big");
4398                         goto error;
4399                     }
4400                     width = width*10 + (c - '0');
4401                 }
4402             }
4403             if (c == '.') {
4404                 prec = 0;
4405                 if (--fmtcnt >= 0)
4406                     c = *fmt++;
4407                 if (c == '*') {
4408                     v = getnextarg(args, arglen, &argidx);
4409                     if (v == NULL)
4410                         goto error;
4411                     if (!PyInt_Check(v)) {
4412                         PyErr_SetString(
4413                             PyExc_TypeError,
4414                             "* wants int");
4415                         goto error;
4416                     }
4417                     prec = _PyInt_AsInt(v);
4418                     if (prec == -1 && PyErr_Occurred())
4419                         goto error;
4420                     if (prec < 0)
4421                         prec = 0;
4422                     if (--fmtcnt >= 0)
4423                         c = *fmt++;
4424                 }
4425                 else if (c >= 0 && isdigit(c)) {
4426                     prec = c - '0';
4427                     while (--fmtcnt >= 0) {
4428                         c = Py_CHARMASK(*fmt++);
4429                         if (!isdigit(c))
4430                             break;
4431                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4432                             PyErr_SetString(
4433                                 PyExc_ValueError,
4434                                 "prec too big");
4435                             goto error;
4436                         }
4437                         prec = prec*10 + (c - '0');
4438                     }
4439                 }
4440             } /* prec */
4441             if (fmtcnt >= 0) {
4442                 if (c == 'h' || c == 'l' || c == 'L') {
4443                     if (--fmtcnt >= 0)
4444                         c = *fmt++;
4445                 }
4446             }
4447             if (fmtcnt < 0) {
4448                 PyErr_SetString(PyExc_ValueError,
4449                                 "incomplete format");
4450                 goto error;
4451             }
4452             if (c != '%') {
4453                 v = getnextarg(args, arglen, &argidx);
4454                 if (v == NULL)
4455                     goto error;
4456             }
4457             sign = 0;
4458             fill = ' ';
4459             switch (c) {
4460             case '%':
4461                 pbuf = "%";
4462                 len = 1;
4463                 break;
4464             case 's':
4465 #ifdef Py_USING_UNICODE
4466                 if (PyUnicode_Check(v)) {
4467                     fmt = fmt_start;
4468                     argidx = argidx_start;
4469                     goto unicode;
4470                 }
4471 #endif
4472                 temp = _PyObject_Str(v);
4473 #ifdef Py_USING_UNICODE
4474                 if (temp != NULL && PyUnicode_Check(temp)) {
4475                     Py_DECREF(temp);
4476                     fmt = fmt_start;
4477                     argidx = argidx_start;
4478                     goto unicode;
4479                 }
4480 #endif
4481                 /* Fall through */
4482             case 'r':
4483                 if (c == 'r')
4484                     temp = PyObject_Repr(v);
4485                 if (temp == NULL)
4486                     goto error;
4487                 if (!PyString_Check(temp)) {
4488                     PyErr_SetString(PyExc_TypeError,
4489                       "%s argument has non-string str()");
4490                     Py_DECREF(temp);
4491                     goto error;
4492                 }
4493                 pbuf = PyString_AS_STRING(temp);
4494                 len = PyString_GET_SIZE(temp);
4495                 if (prec >= 0 && len > prec)
4496                     len = prec;
4497                 break;
4498             case 'i':
4499             case 'd':
4500             case 'u':
4501             case 'o':
4502             case 'x':
4503             case 'X':
4504                 if (c == 'i')
4505                     c = 'd';
4506                 isnumok = 0;
4507                 if (PyNumber_Check(v)) {
4508                     PyObject *iobj=NULL;
4509 
4510                     if (PyInt_Check(v) || (PyLong_Check(v))) {
4511                         iobj = v;
4512                         Py_INCREF(iobj);
4513                     }
4514                     else {
4515                         iobj = PyNumber_Int(v);
4516                         if (iobj==NULL) {
4517                             PyErr_Clear();
4518                             iobj = PyNumber_Long(v);
4519                         }
4520                     }
4521                     if (iobj!=NULL) {
4522                         if (PyInt_Check(iobj)) {
4523                             isnumok = 1;
4524                             pbuf = formatbuf;
4525                             len = formatint(pbuf,
4526                                             sizeof(formatbuf),
4527                                             flags, prec, c, iobj);
4528                             Py_DECREF(iobj);
4529                             if (len < 0)
4530                                 goto error;
4531                             sign = 1;
4532                         }
4533                         else if (PyLong_Check(iobj)) {
4534                             int ilen;
4535 
4536                             isnumok = 1;
4537                             temp = _PyString_FormatLong(iobj, flags,
4538                                 prec, c, &pbuf, &ilen);
4539                             Py_DECREF(iobj);
4540                             len = ilen;
4541                             if (!temp)
4542                                 goto error;
4543                             sign = 1;
4544                         }
4545                         else {
4546                             Py_DECREF(iobj);
4547                         }
4548                     }
4549                 }
4550                 if (!isnumok) {
4551                     PyErr_Format(PyExc_TypeError,
4552                         "%%%c format: a number is required, "
4553                         "not %.200s", c, Py_TYPE(v)->tp_name);
4554                     goto error;
4555                 }
4556                 if (flags & F_ZERO)
4557                     fill = '0';
4558                 break;
4559             case 'e':
4560             case 'E':
4561             case 'f':
4562             case 'F':
4563             case 'g':
4564             case 'G':
4565                 temp = formatfloat(v, flags, prec, c);
4566                 if (temp == NULL)
4567                     goto error;
4568                 pbuf = PyString_AS_STRING(temp);
4569                 len = PyString_GET_SIZE(temp);
4570                 sign = 1;
4571                 if (flags & F_ZERO)
4572                     fill = '0';
4573                 break;
4574             case 'c':
4575 #ifdef Py_USING_UNICODE
4576                 if (PyUnicode_Check(v)) {
4577                     fmt = fmt_start;
4578                     argidx = argidx_start;
4579                     goto unicode;
4580                 }
4581 #endif
4582                 pbuf = formatbuf;
4583                 len = formatchar(pbuf, sizeof(formatbuf), v);
4584                 if (len < 0)
4585                     goto error;
4586                 break;
4587             default:
4588                 PyErr_Format(PyExc_ValueError,
4589                   "unsupported format character '%c' (0x%x) "
4590                   "at index %zd",
4591                   c, c,
4592                   (Py_ssize_t)(fmt - 1 -
4593                                PyString_AsString(format)));
4594                 goto error;
4595             }
4596             if (sign) {
4597                 if (*pbuf == '-' || *pbuf == '+') {
4598                     sign = *pbuf++;
4599                     len--;
4600                 }
4601                 else if (flags & F_SIGN)
4602                     sign = '+';
4603                 else if (flags & F_BLANK)
4604                     sign = ' ';
4605                 else
4606                     sign = 0;
4607             }
4608             if (width < len)
4609                 width = len;
4610             if (rescnt - (sign != 0) < width) {
4611                 reslen -= rescnt;
4612                 rescnt = width + fmtcnt + 100;
4613                 reslen += rescnt;
4614                 if (reslen < 0) {
4615                     Py_DECREF(result);
4616                     Py_XDECREF(temp);
4617                     return PyErr_NoMemory();
4618                 }
4619                 if (_PyString_Resize(&result, reslen)) {
4620                     Py_XDECREF(temp);
4621                     return NULL;
4622                 }
4623                 res = PyString_AS_STRING(result)
4624                     + reslen - rescnt;
4625             }
4626             if (sign) {
4627                 if (fill != ' ')
4628                     *res++ = sign;
4629                 rescnt--;
4630                 if (width > len)
4631                     width--;
4632             }
4633             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4634                 assert(pbuf[0] == '0');
4635                 assert(pbuf[1] == c);
4636                 if (fill != ' ') {
4637                     *res++ = *pbuf++;
4638                     *res++ = *pbuf++;
4639                 }
4640                 rescnt -= 2;
4641                 width -= 2;
4642                 if (width < 0)
4643                     width = 0;
4644                 len -= 2;
4645             }
4646             if (width > len && !(flags & F_LJUST)) {
4647                 do {
4648                     --rescnt;
4649                     *res++ = fill;
4650                 } while (--width > len);
4651             }
4652             if (fill == ' ') {
4653                 if (sign)
4654                     *res++ = sign;
4655                 if ((flags & F_ALT) &&
4656                     (c == 'x' || c == 'X')) {
4657                     assert(pbuf[0] == '0');
4658                     assert(pbuf[1] == c);
4659                     *res++ = *pbuf++;
4660                     *res++ = *pbuf++;
4661                 }
4662             }
4663             Py_MEMCPY(res, pbuf, len);
4664             res += len;
4665             rescnt -= len;
4666             while (--width >= len) {
4667                 --rescnt;
4668                 *res++ = ' ';
4669             }
4670             if (dict && (argidx < arglen) && c != '%') {
4671                 PyErr_SetString(PyExc_TypeError,
4672                            "not all arguments converted during string formatting");
4673                 Py_XDECREF(temp);
4674                 goto error;
4675             }
4676             Py_XDECREF(temp);
4677         } /* '%' */
4678     } /* until end */
4679     if (argidx < arglen && !dict) {
4680         PyErr_SetString(PyExc_TypeError,
4681                         "not all arguments converted during string formatting");
4682         goto error;
4683     }
4684     if (args_owned) {
4685         Py_DECREF(args);
4686     }
4687     if (_PyString_Resize(&result, reslen - rescnt))
4688         return NULL;
4689     return result;
4690 
4691 #ifdef Py_USING_UNICODE
4692  unicode:
4693     if (args_owned) {
4694         Py_DECREF(args);
4695         args_owned = 0;
4696     }
4697     /* Fiddle args right (remove the first argidx arguments) */
4698     if (PyTuple_Check(orig_args) && argidx > 0) {
4699         PyObject *v;
4700         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4701         v = PyTuple_New(n);
4702         if (v == NULL)
4703             goto error;
4704         while (--n >= 0) {
4705             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4706             Py_INCREF(w);
4707             PyTuple_SET_ITEM(v, n, w);
4708         }
4709         args = v;
4710     } else {
4711         Py_INCREF(orig_args);
4712         args = orig_args;
4713     }
4714     args_owned = 1;
4715     /* Take what we have of the result and let the Unicode formatting
4716        function format the rest of the input. */
4717     rescnt = res - PyString_AS_STRING(result);
4718     if (_PyString_Resize(&result, rescnt))
4719         goto error;
4720     fmtcnt = PyString_GET_SIZE(format) - \
4721              (fmt - PyString_AS_STRING(format));
4722     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4723     if (format == NULL)
4724         goto error;
4725     v = PyUnicode_Format(format, args);
4726     Py_DECREF(format);
4727     if (v == NULL)
4728         goto error;
4729     /* Paste what we have (result) to what the Unicode formatting
4730        function returned (v) and return the result (or error) */
4731     w = PyUnicode_Concat(result, v);
4732     Py_DECREF(result);
4733     Py_DECREF(v);
4734     Py_DECREF(args);
4735     return w;
4736 #endif /* Py_USING_UNICODE */
4737 
4738  error:
4739     Py_DECREF(result);
4740     if (args_owned) {
4741         Py_DECREF(args);
4742     }
4743     return NULL;
4744 }
4745 
4746 void
PyString_InternInPlace(PyObject ** p)4747 PyString_InternInPlace(PyObject **p)
4748 {
4749     register PyStringObject *s = (PyStringObject *)(*p);
4750     PyObject *t;
4751     if (s == NULL || !PyString_Check(s))
4752         Py_FatalError("PyString_InternInPlace: strings only please!");
4753     /* If it's a string subclass, we don't really know what putting
4754        it in the interned dict might do. */
4755     if (!PyString_CheckExact(s))
4756         return;
4757     if (PyString_CHECK_INTERNED(s))
4758         return;
4759     if (interned == NULL) {
4760         interned = PyDict_New();
4761         if (interned == NULL) {
4762             PyErr_Clear(); /* Don't leave an exception */
4763             return;
4764         }
4765     }
4766     t = PyDict_GetItem(interned, (PyObject *)s);
4767     if (t) {
4768         Py_INCREF(t);
4769         Py_SETREF(*p, t);
4770         return;
4771     }
4772 
4773     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4774         PyErr_Clear();
4775         return;
4776     }
4777     /* The two references in interned are not counted by refcnt.
4778        The string deallocator will take care of this */
4779     Py_REFCNT(s) -= 2;
4780     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4781 }
4782 
4783 void
PyString_InternImmortal(PyObject ** p)4784 PyString_InternImmortal(PyObject **p)
4785 {
4786     PyString_InternInPlace(p);
4787     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4788         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4789         Py_INCREF(*p);
4790     }
4791 }
4792 
4793 
4794 PyObject *
PyString_InternFromString(const char * cp)4795 PyString_InternFromString(const char *cp)
4796 {
4797     PyObject *s = PyString_FromString(cp);
4798     if (s == NULL)
4799         return NULL;
4800     PyString_InternInPlace(&s);
4801     return s;
4802 }
4803 
4804 void
PyString_Fini(void)4805 PyString_Fini(void)
4806 {
4807     int i;
4808     for (i = 0; i < UCHAR_MAX + 1; i++)
4809         Py_CLEAR(characters[i]);
4810     Py_CLEAR(nullstring);
4811 }
4812 
_Py_ReleaseInternedStrings(void)4813 void _Py_ReleaseInternedStrings(void)
4814 {
4815     PyObject *keys;
4816     PyStringObject *s;
4817     Py_ssize_t i, n;
4818     Py_ssize_t immortal_size = 0, mortal_size = 0;
4819 
4820     if (interned == NULL || !PyDict_Check(interned))
4821         return;
4822     keys = PyDict_Keys(interned);
4823     if (keys == NULL || !PyList_Check(keys)) {
4824         PyErr_Clear();
4825         return;
4826     }
4827 
4828     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4829        detector, interned strings are not forcibly deallocated; rather, we
4830        give them their stolen references back, and then clear and DECREF
4831        the interned dict. */
4832 
4833     n = PyList_GET_SIZE(keys);
4834     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4835         n);
4836     for (i = 0; i < n; i++) {
4837         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4838         switch (s->ob_sstate) {
4839         case SSTATE_NOT_INTERNED:
4840             /* XXX Shouldn't happen */
4841             break;
4842         case SSTATE_INTERNED_IMMORTAL:
4843             Py_REFCNT(s) += 1;
4844             immortal_size += Py_SIZE(s);
4845             break;
4846         case SSTATE_INTERNED_MORTAL:
4847             Py_REFCNT(s) += 2;
4848             mortal_size += Py_SIZE(s);
4849             break;
4850         default:
4851             Py_FatalError("Inconsistent interned string state.");
4852         }
4853         s->ob_sstate = SSTATE_NOT_INTERNED;
4854     }
4855     fprintf(stderr, "total size of all interned strings: "
4856                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4857                     "mortal/immortal\n", mortal_size, immortal_size);
4858     Py_DECREF(keys);
4859     PyDict_Clear(interned);
4860     Py_CLEAR(interned);
4861 }
4862