• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 
7 #include "bytes_methods.h"
8 #include "pystrhex.h"
9 #include <stddef.h>
10 
11 /*[clinic input]
12 class bytes "PyBytesObject *" "&PyBytes_Type"
13 [clinic start generated code]*/
14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
15 
16 #include "clinic/bytesobject.c.h"
17 
18 #ifdef COUNT_ALLOCS
19 Py_ssize_t null_strings, one_strings;
20 #endif
21 
22 static PyBytesObject *characters[UCHAR_MAX + 1];
23 static PyBytesObject *nullstring;
24 
25 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26    for a string of length n should request PyBytesObject_SIZE + n bytes.
27 
28    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29    3 bytes per string allocation on a typical system.
30 */
31 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32 
33 /* Forward declaration */
34 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35                                                    char *str);
36 
37 /*
38    For PyBytes_FromString(), the parameter `str' points to a null-terminated
39    string containing exactly `size' bytes.
40 
41    For PyBytes_FromStringAndSize(), the parameter `str' is
42    either NULL or else points to a string containing at least `size' bytes.
43    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44    not have to be null-terminated.  (Therefore it is safe to construct a
45    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47    bytes (setting the last byte to the null terminating character) and you can
48    fill in the data yourself.  If `str' is non-NULL then the resulting
49    PyBytes object must be treated as immutable and you must not fill in nor
50    alter the data yourself, since the strings may be shared.
51 
52    The PyObject member `op->ob_size', which denotes the number of "extra
53    items" in a variable-size object, will contain the number of bytes
54    allocated for string data, not counting the null terminating character.
55    It is therefore equal to the `size' parameter (for
56    PyBytes_FromStringAndSize()) or the length of the string in the `str'
57    parameter (for PyBytes_FromString()).
58 */
59 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)60 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
61 {
62     PyBytesObject *op;
63     assert(size >= 0);
64 
65     if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67         null_strings++;
68 #endif
69         Py_INCREF(op);
70         return (PyObject *)op;
71     }
72 
73     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
74         PyErr_SetString(PyExc_OverflowError,
75                         "byte string is too large");
76         return NULL;
77     }
78 
79     /* Inline PyObject_NewVar */
80     if (use_calloc)
81         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82     else
83         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
84     if (op == NULL)
85         return PyErr_NoMemory();
86     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
87     op->ob_shash = -1;
88     if (!use_calloc)
89         op->ob_sval[size] = '\0';
90     /* empty byte string singleton */
91     if (size == 0) {
92         nullstring = op;
93         Py_INCREF(op);
94     }
95     return (PyObject *) op;
96 }
97 
98 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)99 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100 {
101     PyBytesObject *op;
102     if (size < 0) {
103         PyErr_SetString(PyExc_SystemError,
104             "Negative size passed to PyBytes_FromStringAndSize");
105         return NULL;
106     }
107     if (size == 1 && str != NULL &&
108         (op = characters[*str & UCHAR_MAX]) != NULL)
109     {
110 #ifdef COUNT_ALLOCS
111         one_strings++;
112 #endif
113         Py_INCREF(op);
114         return (PyObject *)op;
115     }
116 
117     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118     if (op == NULL)
119         return NULL;
120     if (str == NULL)
121         return (PyObject *) op;
122 
123     memcpy(op->ob_sval, str, size);
124     /* share short strings */
125     if (size == 1) {
126         characters[*str & UCHAR_MAX] = op;
127         Py_INCREF(op);
128     }
129     return (PyObject *) op;
130 }
131 
132 PyObject *
PyBytes_FromString(const char * str)133 PyBytes_FromString(const char *str)
134 {
135     size_t size;
136     PyBytesObject *op;
137 
138     assert(str != NULL);
139     size = strlen(str);
140     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141         PyErr_SetString(PyExc_OverflowError,
142             "byte string is too long");
143         return NULL;
144     }
145     if (size == 0 && (op = nullstring) != NULL) {
146 #ifdef COUNT_ALLOCS
147         null_strings++;
148 #endif
149         Py_INCREF(op);
150         return (PyObject *)op;
151     }
152     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
153 #ifdef COUNT_ALLOCS
154         one_strings++;
155 #endif
156         Py_INCREF(op);
157         return (PyObject *)op;
158     }
159 
160     /* Inline PyObject_NewVar */
161     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162     if (op == NULL)
163         return PyErr_NoMemory();
164     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
165     op->ob_shash = -1;
166     memcpy(op->ob_sval, str, size+1);
167     /* share short strings */
168     if (size == 0) {
169         nullstring = op;
170         Py_INCREF(op);
171     } else if (size == 1) {
172         characters[*str & UCHAR_MAX] = op;
173         Py_INCREF(op);
174     }
175     return (PyObject *) op;
176 }
177 
178 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)179 PyBytes_FromFormatV(const char *format, va_list vargs)
180 {
181     char *s;
182     const char *f;
183     const char *p;
184     Py_ssize_t prec;
185     int longflag;
186     int size_tflag;
187     /* Longest 64-bit formatted numbers:
188        - "18446744073709551615\0" (21 bytes)
189        - "-9223372036854775808\0" (21 bytes)
190        Decimal takes the most space (it isn't enough for octal.)
191 
192        Longest 64-bit pointer representation:
193        "0xffffffffffffffff\0" (19 bytes). */
194     char buffer[21];
195     _PyBytesWriter writer;
196 
197     _PyBytesWriter_Init(&writer);
198 
199     s = _PyBytesWriter_Alloc(&writer, strlen(format));
200     if (s == NULL)
201         return NULL;
202     writer.overallocate = 1;
203 
204 #define WRITE_BYTES(str) \
205     do { \
206         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207         if (s == NULL) \
208             goto error; \
209     } while (0)
210 
211     for (f = format; *f; f++) {
212         if (*f != '%') {
213             *s++ = *f;
214             continue;
215         }
216 
217         p = f++;
218 
219         /* ignore the width (ex: 10 in "%10s") */
220         while (Py_ISDIGIT(*f))
221             f++;
222 
223         /* parse the precision (ex: 10 in "%.10s") */
224         prec = 0;
225         if (*f == '.') {
226             f++;
227             for (; Py_ISDIGIT(*f); f++) {
228                 prec = (prec * 10) + (*f - '0');
229             }
230         }
231 
232         while (*f && *f != '%' && !Py_ISALPHA(*f))
233             f++;
234 
235         /* handle the long flag ('l'), but only for %ld and %lu.
236            others can be added when necessary. */
237         longflag = 0;
238         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239             longflag = 1;
240             ++f;
241         }
242 
243         /* handle the size_t flag ('z'). */
244         size_tflag = 0;
245         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246             size_tflag = 1;
247             ++f;
248         }
249 
250         /* subtract bytes preallocated for the format string
251            (ex: 2 for "%s") */
252         writer.min_size -= (f - p + 1);
253 
254         switch (*f) {
255         case 'c':
256         {
257             int c = va_arg(vargs, int);
258             if (c < 0 || c > 255) {
259                 PyErr_SetString(PyExc_OverflowError,
260                                 "PyBytes_FromFormatV(): %c format "
261                                 "expects an integer in range [0; 255]");
262                 goto error;
263             }
264             writer.min_size++;
265             *s++ = (unsigned char)c;
266             break;
267         }
268 
269         case 'd':
270             if (longflag)
271                 sprintf(buffer, "%ld", va_arg(vargs, long));
272             else if (size_tflag)
273                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274                     va_arg(vargs, Py_ssize_t));
275             else
276                 sprintf(buffer, "%d", va_arg(vargs, int));
277             assert(strlen(buffer) < sizeof(buffer));
278             WRITE_BYTES(buffer);
279             break;
280 
281         case 'u':
282             if (longflag)
283                 sprintf(buffer, "%lu",
284                     va_arg(vargs, unsigned long));
285             else if (size_tflag)
286                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287                     va_arg(vargs, size_t));
288             else
289                 sprintf(buffer, "%u",
290                     va_arg(vargs, unsigned int));
291             assert(strlen(buffer) < sizeof(buffer));
292             WRITE_BYTES(buffer);
293             break;
294 
295         case 'i':
296             sprintf(buffer, "%i", va_arg(vargs, int));
297             assert(strlen(buffer) < sizeof(buffer));
298             WRITE_BYTES(buffer);
299             break;
300 
301         case 'x':
302             sprintf(buffer, "%x", va_arg(vargs, int));
303             assert(strlen(buffer) < sizeof(buffer));
304             WRITE_BYTES(buffer);
305             break;
306 
307         case 's':
308         {
309             Py_ssize_t i;
310 
311             p = va_arg(vargs, const char*);
312             i = strlen(p);
313             if (prec > 0 && i > prec)
314                 i = prec;
315             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316             if (s == NULL)
317                 goto error;
318             break;
319         }
320 
321         case 'p':
322             sprintf(buffer, "%p", va_arg(vargs, void*));
323             assert(strlen(buffer) < sizeof(buffer));
324             /* %p is ill-defined:  ensure leading 0x. */
325             if (buffer[1] == 'X')
326                 buffer[1] = 'x';
327             else if (buffer[1] != 'x') {
328                 memmove(buffer+2, buffer, strlen(buffer)+1);
329                 buffer[0] = '0';
330                 buffer[1] = 'x';
331             }
332             WRITE_BYTES(buffer);
333             break;
334 
335         case '%':
336             writer.min_size++;
337             *s++ = '%';
338             break;
339 
340         default:
341             if (*f == 0) {
342                 /* fix min_size if we reached the end of the format string */
343                 writer.min_size++;
344             }
345 
346             /* invalid format string: copy unformatted string and exit */
347             WRITE_BYTES(p);
348             return _PyBytesWriter_Finish(&writer, s);
349         }
350     }
351 
352 #undef WRITE_BYTES
353 
354     return _PyBytesWriter_Finish(&writer, s);
355 
356  error:
357     _PyBytesWriter_Dealloc(&writer);
358     return NULL;
359 }
360 
361 PyObject *
PyBytes_FromFormat(const char * format,...)362 PyBytes_FromFormat(const char *format, ...)
363 {
364     PyObject* ret;
365     va_list vargs;
366 
367 #ifdef HAVE_STDARG_PROTOTYPES
368     va_start(vargs, format);
369 #else
370     va_start(vargs);
371 #endif
372     ret = PyBytes_FromFormatV(format, vargs);
373     va_end(vargs);
374     return ret;
375 }
376 
377 /* Helpers for formatstring */
378 
379 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)380 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381 {
382     Py_ssize_t argidx = *p_argidx;
383     if (argidx < arglen) {
384         (*p_argidx)++;
385         if (arglen < 0)
386             return args;
387         else
388             return PyTuple_GetItem(args, argidx);
389     }
390     PyErr_SetString(PyExc_TypeError,
391                     "not enough arguments for format string");
392     return NULL;
393 }
394 
395 /* Format codes
396  * F_LJUST      '-'
397  * F_SIGN       '+'
398  * F_BLANK      ' '
399  * F_ALT        '#'
400  * F_ZERO       '0'
401  */
402 #define F_LJUST (1<<0)
403 #define F_SIGN  (1<<1)
404 #define F_BLANK (1<<2)
405 #define F_ALT   (1<<3)
406 #define F_ZERO  (1<<4)
407 
408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
409 
410 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)411 formatfloat(PyObject *v, int flags, int prec, int type,
412             PyObject **p_result, _PyBytesWriter *writer, char *str)
413 {
414     char *p;
415     PyObject *result;
416     double x;
417     size_t len;
418 
419     x = PyFloat_AsDouble(v);
420     if (x == -1.0 && PyErr_Occurred()) {
421         PyErr_Format(PyExc_TypeError, "float argument required, "
422                      "not %.200s", Py_TYPE(v)->tp_name);
423         return NULL;
424     }
425 
426     if (prec < 0)
427         prec = 6;
428 
429     p = PyOS_double_to_string(x, type, prec,
430                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431 
432     if (p == NULL)
433         return NULL;
434 
435     len = strlen(p);
436     if (writer != NULL) {
437         str = _PyBytesWriter_Prepare(writer, str, len);
438         if (str == NULL)
439             return NULL;
440         memcpy(str, p, len);
441         PyMem_Free(p);
442         str += len;
443         return str;
444     }
445 
446     result = PyBytes_FromStringAndSize(p, len);
447     PyMem_Free(p);
448     *p_result = result;
449     return str;
450 }
451 
452 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)453 formatlong(PyObject *v, int flags, int prec, int type)
454 {
455     PyObject *result, *iobj;
456     if (type == 'i')
457         type = 'd';
458     if (PyLong_Check(v))
459         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460     if (PyNumber_Check(v)) {
461         /* make sure number is a type of integer for o, x, and X */
462         if (type == 'o' || type == 'x' || type == 'X')
463             iobj = PyNumber_Index(v);
464         else
465             iobj = PyNumber_Long(v);
466         if (iobj == NULL) {
467             if (!PyErr_ExceptionMatches(PyExc_TypeError))
468                 return NULL;
469         }
470         else if (!PyLong_Check(iobj))
471             Py_CLEAR(iobj);
472         if (iobj != NULL) {
473             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474             Py_DECREF(iobj);
475             return result;
476         }
477     }
478     PyErr_Format(PyExc_TypeError,
479         "%%%c format: %s is required, not %.200s", type,
480         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481                                                     : "a number",
482         Py_TYPE(v)->tp_name);
483     return NULL;
484 }
485 
486 static int
byte_converter(PyObject * arg,char * p)487 byte_converter(PyObject *arg, char *p)
488 {
489     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
490         *p = PyBytes_AS_STRING(arg)[0];
491         return 1;
492     }
493     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
494         *p = PyByteArray_AS_STRING(arg)[0];
495         return 1;
496     }
497     else {
498         PyObject *iobj;
499         long ival;
500         int overflow;
501         /* make sure number is a type of integer */
502         if (PyLong_Check(arg)) {
503             ival = PyLong_AsLongAndOverflow(arg, &overflow);
504         }
505         else {
506             iobj = PyNumber_Index(arg);
507             if (iobj == NULL) {
508                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509                     return 0;
510                 goto onError;
511             }
512             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513             Py_DECREF(iobj);
514         }
515         if (!overflow && ival == -1 && PyErr_Occurred())
516             goto onError;
517         if (overflow || !(0 <= ival && ival <= 255)) {
518             PyErr_SetString(PyExc_OverflowError,
519                             "%c arg not in range(256)");
520             return 0;
521         }
522         *p = (char)ival;
523         return 1;
524     }
525   onError:
526     PyErr_SetString(PyExc_TypeError,
527         "%c requires an integer in range(256) or a single byte");
528     return 0;
529 }
530 
531 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)532 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
533 {
534     PyObject *func, *result;
535     _Py_IDENTIFIER(__bytes__);
536     /* is it a bytes object? */
537     if (PyBytes_Check(v)) {
538         *pbuf = PyBytes_AS_STRING(v);
539         *plen = PyBytes_GET_SIZE(v);
540         Py_INCREF(v);
541         return v;
542     }
543     if (PyByteArray_Check(v)) {
544         *pbuf = PyByteArray_AS_STRING(v);
545         *plen = PyByteArray_GET_SIZE(v);
546         Py_INCREF(v);
547         return v;
548     }
549     /* does it support __bytes__? */
550     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551     if (func != NULL) {
552         result = PyObject_CallFunctionObjArgs(func, NULL);
553         Py_DECREF(func);
554         if (result == NULL)
555             return NULL;
556         if (!PyBytes_Check(result)) {
557             PyErr_Format(PyExc_TypeError,
558                          "__bytes__ returned non-bytes (type %.200s)",
559                          Py_TYPE(result)->tp_name);
560             Py_DECREF(result);
561             return NULL;
562         }
563         *pbuf = PyBytes_AS_STRING(result);
564         *plen = PyBytes_GET_SIZE(result);
565         return result;
566     }
567     PyErr_Format(PyExc_TypeError,
568                  "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569                  Py_TYPE(v)->tp_name);
570     return NULL;
571 }
572 
573 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
574 
575 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)576 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577                   PyObject *args, int use_bytearray)
578 {
579     const char *fmt;
580     char *res;
581     Py_ssize_t arglen, argidx;
582     Py_ssize_t fmtcnt;
583     int args_owned = 0;
584     PyObject *dict = NULL;
585     _PyBytesWriter writer;
586 
587     if (args == NULL) {
588         PyErr_BadInternalCall();
589         return NULL;
590     }
591     fmt = format;
592     fmtcnt = format_len;
593 
594     _PyBytesWriter_Init(&writer);
595     writer.use_bytearray = use_bytearray;
596 
597     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598     if (res == NULL)
599         return NULL;
600     if (!use_bytearray)
601         writer.overallocate = 1;
602 
603     if (PyTuple_Check(args)) {
604         arglen = PyTuple_GET_SIZE(args);
605         argidx = 0;
606     }
607     else {
608         arglen = -1;
609         argidx = -2;
610     }
611     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613         !PyByteArray_Check(args)) {
614             dict = args;
615     }
616 
617     while (--fmtcnt >= 0) {
618         if (*fmt != '%') {
619             Py_ssize_t len;
620             char *pos;
621 
622             pos = strchr(fmt + 1, '%');
623             if (pos != NULL)
624                 len = pos - fmt;
625             else
626                 len = format_len - (fmt - format);
627             assert(len != 0);
628 
629             memcpy(res, fmt, len);
630             res += len;
631             fmt += len;
632             fmtcnt -= (len - 1);
633         }
634         else {
635             /* Got a format specifier */
636             int flags = 0;
637             Py_ssize_t width = -1;
638             int prec = -1;
639             int c = '\0';
640             int fill;
641             PyObject *v = NULL;
642             PyObject *temp = NULL;
643             const char *pbuf = NULL;
644             int sign;
645             Py_ssize_t len = 0;
646             char onechar; /* For byte_converter() */
647             Py_ssize_t alloc;
648 #ifdef Py_DEBUG
649             char *before;
650 #endif
651 
652             fmt++;
653             if (*fmt == '(') {
654                 const char *keystart;
655                 Py_ssize_t keylen;
656                 PyObject *key;
657                 int pcount = 1;
658 
659                 if (dict == NULL) {
660                     PyErr_SetString(PyExc_TypeError,
661                              "format requires a mapping");
662                     goto error;
663                 }
664                 ++fmt;
665                 --fmtcnt;
666                 keystart = fmt;
667                 /* Skip over balanced parentheses */
668                 while (pcount > 0 && --fmtcnt >= 0) {
669                     if (*fmt == ')')
670                         --pcount;
671                     else if (*fmt == '(')
672                         ++pcount;
673                     fmt++;
674                 }
675                 keylen = fmt - keystart - 1;
676                 if (fmtcnt < 0 || pcount > 0) {
677                     PyErr_SetString(PyExc_ValueError,
678                                "incomplete format key");
679                     goto error;
680                 }
681                 key = PyBytes_FromStringAndSize(keystart,
682                                                  keylen);
683                 if (key == NULL)
684                     goto error;
685                 if (args_owned) {
686                     Py_DECREF(args);
687                     args_owned = 0;
688                 }
689                 args = PyObject_GetItem(dict, key);
690                 Py_DECREF(key);
691                 if (args == NULL) {
692                     goto error;
693                 }
694                 args_owned = 1;
695                 arglen = -1;
696                 argidx = -2;
697             }
698 
699             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
700             while (--fmtcnt >= 0) {
701                 switch (c = *fmt++) {
702                 case '-': flags |= F_LJUST; continue;
703                 case '+': flags |= F_SIGN; continue;
704                 case ' ': flags |= F_BLANK; continue;
705                 case '#': flags |= F_ALT; continue;
706                 case '0': flags |= F_ZERO; continue;
707                 }
708                 break;
709             }
710 
711             /* Parse width. Example: "%10s" => width=10 */
712             if (c == '*') {
713                 v = getnextarg(args, arglen, &argidx);
714                 if (v == NULL)
715                     goto error;
716                 if (!PyLong_Check(v)) {
717                     PyErr_SetString(PyExc_TypeError,
718                                     "* wants int");
719                     goto error;
720                 }
721                 width = PyLong_AsSsize_t(v);
722                 if (width == -1 && PyErr_Occurred())
723                     goto error;
724                 if (width < 0) {
725                     flags |= F_LJUST;
726                     width = -width;
727                 }
728                 if (--fmtcnt >= 0)
729                     c = *fmt++;
730             }
731             else if (c >= 0 && isdigit(c)) {
732                 width = c - '0';
733                 while (--fmtcnt >= 0) {
734                     c = Py_CHARMASK(*fmt++);
735                     if (!isdigit(c))
736                         break;
737                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738                         PyErr_SetString(
739                             PyExc_ValueError,
740                             "width too big");
741                         goto error;
742                     }
743                     width = width*10 + (c - '0');
744                 }
745             }
746 
747             /* Parse precision. Example: "%.3f" => prec=3 */
748             if (c == '.') {
749                 prec = 0;
750                 if (--fmtcnt >= 0)
751                     c = *fmt++;
752                 if (c == '*') {
753                     v = getnextarg(args, arglen, &argidx);
754                     if (v == NULL)
755                         goto error;
756                     if (!PyLong_Check(v)) {
757                         PyErr_SetString(
758                             PyExc_TypeError,
759                             "* wants int");
760                         goto error;
761                     }
762                     prec = _PyLong_AsInt(v);
763                     if (prec == -1 && PyErr_Occurred())
764                         goto error;
765                     if (prec < 0)
766                         prec = 0;
767                     if (--fmtcnt >= 0)
768                         c = *fmt++;
769                 }
770                 else if (c >= 0 && isdigit(c)) {
771                     prec = c - '0';
772                     while (--fmtcnt >= 0) {
773                         c = Py_CHARMASK(*fmt++);
774                         if (!isdigit(c))
775                             break;
776                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777                             PyErr_SetString(
778                                 PyExc_ValueError,
779                                 "prec too big");
780                             goto error;
781                         }
782                         prec = prec*10 + (c - '0');
783                     }
784                 }
785             } /* prec */
786             if (fmtcnt >= 0) {
787                 if (c == 'h' || c == 'l' || c == 'L') {
788                     if (--fmtcnt >= 0)
789                         c = *fmt++;
790                 }
791             }
792             if (fmtcnt < 0) {
793                 PyErr_SetString(PyExc_ValueError,
794                                 "incomplete format");
795                 goto error;
796             }
797             if (c != '%') {
798                 v = getnextarg(args, arglen, &argidx);
799                 if (v == NULL)
800                     goto error;
801             }
802 
803             if (fmtcnt < 0) {
804                 /* last writer: disable writer overallocation */
805                 writer.overallocate = 0;
806             }
807 
808             sign = 0;
809             fill = ' ';
810             switch (c) {
811             case '%':
812                 *res++ = '%';
813                 continue;
814 
815             case 'r':
816                 // %r is only for 2/3 code; 3 only code should use %a
817             case 'a':
818                 temp = PyObject_ASCII(v);
819                 if (temp == NULL)
820                     goto error;
821                 assert(PyUnicode_IS_ASCII(temp));
822                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823                 len = PyUnicode_GET_LENGTH(temp);
824                 if (prec >= 0 && len > prec)
825                     len = prec;
826                 break;
827 
828             case 's':
829                 // %s is only for 2/3 code; 3 only code should use %b
830             case 'b':
831                 temp = format_obj(v, &pbuf, &len);
832                 if (temp == NULL)
833                     goto error;
834                 if (prec >= 0 && len > prec)
835                     len = prec;
836                 break;
837 
838             case 'i':
839             case 'd':
840             case 'u':
841             case 'o':
842             case 'x':
843             case 'X':
844                 if (PyLong_CheckExact(v)
845                     && width == -1 && prec == -1
846                     && !(flags & (F_SIGN | F_BLANK))
847                     && c != 'X')
848                 {
849                     /* Fast path */
850                     int alternate = flags & F_ALT;
851                     int base;
852 
853                     switch(c)
854                     {
855                         default:
856                             assert(0 && "'type' not in [diuoxX]");
857                         case 'd':
858                         case 'i':
859                         case 'u':
860                             base = 10;
861                             break;
862                         case 'o':
863                             base = 8;
864                             break;
865                         case 'x':
866                         case 'X':
867                             base = 16;
868                             break;
869                     }
870 
871                     /* Fast path */
872                     writer.min_size -= 2; /* size preallocated for "%d" */
873                     res = _PyLong_FormatBytesWriter(&writer, res,
874                                                     v, base, alternate);
875                     if (res == NULL)
876                         goto error;
877                     continue;
878                 }
879 
880                 temp = formatlong(v, flags, prec, c);
881                 if (!temp)
882                     goto error;
883                 assert(PyUnicode_IS_ASCII(temp));
884                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885                 len = PyUnicode_GET_LENGTH(temp);
886                 sign = 1;
887                 if (flags & F_ZERO)
888                     fill = '0';
889                 break;
890 
891             case 'e':
892             case 'E':
893             case 'f':
894             case 'F':
895             case 'g':
896             case 'G':
897                 if (width == -1 && prec == -1
898                     && !(flags & (F_SIGN | F_BLANK)))
899                 {
900                     /* Fast path */
901                     writer.min_size -= 2; /* size preallocated for "%f" */
902                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
903                     if (res == NULL)
904                         goto error;
905                     continue;
906                 }
907 
908                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
909                     goto error;
910                 pbuf = PyBytes_AS_STRING(temp);
911                 len = PyBytes_GET_SIZE(temp);
912                 sign = 1;
913                 if (flags & F_ZERO)
914                     fill = '0';
915                 break;
916 
917             case 'c':
918                 pbuf = &onechar;
919                 len = byte_converter(v, &onechar);
920                 if (!len)
921                     goto error;
922                 if (width == -1) {
923                     /* Fast path */
924                     *res++ = onechar;
925                     continue;
926                 }
927                 break;
928 
929             default:
930                 PyErr_Format(PyExc_ValueError,
931                   "unsupported format character '%c' (0x%x) "
932                   "at index %zd",
933                   c, c,
934                   (Py_ssize_t)(fmt - 1 - format));
935                 goto error;
936             }
937 
938             if (sign) {
939                 if (*pbuf == '-' || *pbuf == '+') {
940                     sign = *pbuf++;
941                     len--;
942                 }
943                 else if (flags & F_SIGN)
944                     sign = '+';
945                 else if (flags & F_BLANK)
946                     sign = ' ';
947                 else
948                     sign = 0;
949             }
950             if (width < len)
951                 width = len;
952 
953             alloc = width;
954             if (sign != 0 && len == width)
955                 alloc++;
956             /* 2: size preallocated for %s */
957             if (alloc > 2) {
958                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
959                 if (res == NULL)
960                     goto error;
961             }
962 #ifdef Py_DEBUG
963             before = res;
964 #endif
965 
966             /* Write the sign if needed */
967             if (sign) {
968                 if (fill != ' ')
969                     *res++ = sign;
970                 if (width > len)
971                     width--;
972             }
973 
974             /* Write the numeric prefix for "x", "X" and "o" formats
975                if the alternate form is used.
976                For example, write "0x" for the "%#x" format. */
977             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
978                 assert(pbuf[0] == '0');
979                 assert(pbuf[1] == c);
980                 if (fill != ' ') {
981                     *res++ = *pbuf++;
982                     *res++ = *pbuf++;
983                 }
984                 width -= 2;
985                 if (width < 0)
986                     width = 0;
987                 len -= 2;
988             }
989 
990             /* Pad left with the fill character if needed */
991             if (width > len && !(flags & F_LJUST)) {
992                 memset(res, fill, width - len);
993                 res += (width - len);
994                 width = len;
995             }
996 
997             /* If padding with spaces: write sign if needed and/or numeric
998                prefix if the alternate form is used */
999             if (fill == ' ') {
1000                 if (sign)
1001                     *res++ = sign;
1002                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1003                     assert(pbuf[0] == '0');
1004                     assert(pbuf[1] == c);
1005                     *res++ = *pbuf++;
1006                     *res++ = *pbuf++;
1007                 }
1008             }
1009 
1010             /* Copy bytes */
1011             memcpy(res, pbuf, len);
1012             res += len;
1013 
1014             /* Pad right with the fill character if needed */
1015             if (width > len) {
1016                 memset(res, ' ', width - len);
1017                 res += (width - len);
1018             }
1019 
1020             if (dict && (argidx < arglen) && c != '%') {
1021                 PyErr_SetString(PyExc_TypeError,
1022                            "not all arguments converted during bytes formatting");
1023                 Py_XDECREF(temp);
1024                 goto error;
1025             }
1026             Py_XDECREF(temp);
1027 
1028 #ifdef Py_DEBUG
1029             /* check that we computed the exact size for this write */
1030             assert((res - before) == alloc);
1031 #endif
1032         } /* '%' */
1033 
1034         /* If overallocation was disabled, ensure that it was the last
1035            write. Otherwise, we missed an optimization */
1036         assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
1037     } /* until end */
1038 
1039     if (argidx < arglen && !dict) {
1040         PyErr_SetString(PyExc_TypeError,
1041                         "not all arguments converted during bytes formatting");
1042         goto error;
1043     }
1044 
1045     if (args_owned) {
1046         Py_DECREF(args);
1047     }
1048     return _PyBytesWriter_Finish(&writer, res);
1049 
1050  error:
1051     _PyBytesWriter_Dealloc(&writer);
1052     if (args_owned) {
1053         Py_DECREF(args);
1054     }
1055     return NULL;
1056 }
1057 
1058 /* =-= */
1059 
1060 static void
bytes_dealloc(PyObject * op)1061 bytes_dealloc(PyObject *op)
1062 {
1063     Py_TYPE(op)->tp_free(op);
1064 }
1065 
1066 /* Unescape a backslash-escaped string. If unicode is non-zero,
1067    the string is a u-literal. If recode_encoding is non-zero,
1068    the string is UTF-8 encoded and should be re-encoded in the
1069    specified encoding.  */
1070 
1071 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1072 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073                             const char *errors, const char *recode_encoding,
1074                             _PyBytesWriter *writer, char *p)
1075 {
1076     PyObject *u, *w;
1077     const char* t;
1078 
1079     t = *s;
1080     /* Decode non-ASCII bytes as UTF-8. */
1081     while (t < end && (*t & 0x80))
1082         t++;
1083     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084     if (u == NULL)
1085         return NULL;
1086 
1087     /* Recode them in target encoding. */
1088     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089     Py_DECREF(u);
1090     if  (w == NULL)
1091         return NULL;
1092     assert(PyBytes_Check(w));
1093 
1094     /* Append bytes to output buffer. */
1095     writer->min_size--;   /* subtract 1 preallocated byte */
1096     p = _PyBytesWriter_WriteBytes(writer, p,
1097                                   PyBytes_AS_STRING(w),
1098                                   PyBytes_GET_SIZE(w));
1099     Py_DECREF(w);
1100     if (p == NULL)
1101         return NULL;
1102 
1103     *s = t;
1104     return p;
1105 }
1106 
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1107 PyObject *_PyBytes_DecodeEscape(const char *s,
1108                                 Py_ssize_t len,
1109                                 const char *errors,
1110                                 Py_ssize_t unicode,
1111                                 const char *recode_encoding,
1112                                 const char **first_invalid_escape)
1113 {
1114     int c;
1115     char *p;
1116     const char *end;
1117     _PyBytesWriter writer;
1118 
1119     _PyBytesWriter_Init(&writer);
1120 
1121     p = _PyBytesWriter_Alloc(&writer, len);
1122     if (p == NULL)
1123         return NULL;
1124     writer.overallocate = 1;
1125 
1126     *first_invalid_escape = NULL;
1127 
1128     end = s + len;
1129     while (s < end) {
1130         if (*s != '\\') {
1131           non_esc:
1132             if (!(recode_encoding && (*s & 0x80))) {
1133                 *p++ = *s++;
1134             }
1135             else {
1136                 /* non-ASCII character and need to recode */
1137                 p = _PyBytes_DecodeEscapeRecode(&s, end,
1138                                                 errors, recode_encoding,
1139                                                 &writer, p);
1140                 if (p == NULL)
1141                     goto failed;
1142             }
1143             continue;
1144         }
1145 
1146         s++;
1147         if (s == end) {
1148             PyErr_SetString(PyExc_ValueError,
1149                             "Trailing \\ in string");
1150             goto failed;
1151         }
1152 
1153         switch (*s++) {
1154         /* XXX This assumes ASCII! */
1155         case '\n': break;
1156         case '\\': *p++ = '\\'; break;
1157         case '\'': *p++ = '\''; break;
1158         case '\"': *p++ = '\"'; break;
1159         case 'b': *p++ = '\b'; break;
1160         case 'f': *p++ = '\014'; break; /* FF */
1161         case 't': *p++ = '\t'; break;
1162         case 'n': *p++ = '\n'; break;
1163         case 'r': *p++ = '\r'; break;
1164         case 'v': *p++ = '\013'; break; /* VT */
1165         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1166         case '0': case '1': case '2': case '3':
1167         case '4': case '5': case '6': case '7':
1168             c = s[-1] - '0';
1169             if (s < end && '0' <= *s && *s <= '7') {
1170                 c = (c<<3) + *s++ - '0';
1171                 if (s < end && '0' <= *s && *s <= '7')
1172                     c = (c<<3) + *s++ - '0';
1173             }
1174             *p++ = c;
1175             break;
1176         case 'x':
1177             if (s+1 < end) {
1178                 int digit1, digit2;
1179                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1180                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1181                 if (digit1 < 16 && digit2 < 16) {
1182                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1183                     s += 2;
1184                     break;
1185                 }
1186             }
1187             /* invalid hexadecimal digits */
1188 
1189             if (!errors || strcmp(errors, "strict") == 0) {
1190                 PyErr_Format(PyExc_ValueError,
1191                              "invalid \\x escape at position %d",
1192                              s - 2 - (end - len));
1193                 goto failed;
1194             }
1195             if (strcmp(errors, "replace") == 0) {
1196                 *p++ = '?';
1197             } else if (strcmp(errors, "ignore") == 0)
1198                 /* do nothing */;
1199             else {
1200                 PyErr_Format(PyExc_ValueError,
1201                              "decoding error; unknown "
1202                              "error handling code: %.400s",
1203                              errors);
1204                 goto failed;
1205             }
1206             /* skip \x */
1207             if (s < end && Py_ISXDIGIT(s[0]))
1208                 s++; /* and a hexdigit */
1209             break;
1210 
1211         default:
1212             if (*first_invalid_escape == NULL) {
1213                 *first_invalid_escape = s-1; /* Back up one char, since we've
1214                                                 already incremented s. */
1215             }
1216             *p++ = '\\';
1217             s--;
1218             goto non_esc; /* an arbitrary number of unescaped
1219                              UTF-8 bytes may follow. */
1220         }
1221     }
1222 
1223     return _PyBytesWriter_Finish(&writer, p);
1224 
1225   failed:
1226     _PyBytesWriter_Dealloc(&writer);
1227     return NULL;
1228 }
1229 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1230 PyObject *PyBytes_DecodeEscape(const char *s,
1231                                 Py_ssize_t len,
1232                                 const char *errors,
1233                                 Py_ssize_t unicode,
1234                                 const char *recode_encoding)
1235 {
1236     const char* first_invalid_escape;
1237     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1238                                              recode_encoding,
1239                                              &first_invalid_escape);
1240     if (result == NULL)
1241         return NULL;
1242     if (first_invalid_escape != NULL) {
1243         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1244                              "invalid escape sequence '\\%c'",
1245                              *first_invalid_escape) < 0) {
1246             Py_DECREF(result);
1247             return NULL;
1248         }
1249     }
1250     return result;
1251 
1252 }
1253 /* -------------------------------------------------------------------- */
1254 /* object api */
1255 
1256 Py_ssize_t
PyBytes_Size(PyObject * op)1257 PyBytes_Size(PyObject *op)
1258 {
1259     if (!PyBytes_Check(op)) {
1260         PyErr_Format(PyExc_TypeError,
1261              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1262         return -1;
1263     }
1264     return Py_SIZE(op);
1265 }
1266 
1267 char *
PyBytes_AsString(PyObject * op)1268 PyBytes_AsString(PyObject *op)
1269 {
1270     if (!PyBytes_Check(op)) {
1271         PyErr_Format(PyExc_TypeError,
1272              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273         return NULL;
1274     }
1275     return ((PyBytesObject *)op)->ob_sval;
1276 }
1277 
1278 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1279 PyBytes_AsStringAndSize(PyObject *obj,
1280                          char **s,
1281                          Py_ssize_t *len)
1282 {
1283     if (s == NULL) {
1284         PyErr_BadInternalCall();
1285         return -1;
1286     }
1287 
1288     if (!PyBytes_Check(obj)) {
1289         PyErr_Format(PyExc_TypeError,
1290              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1291         return -1;
1292     }
1293 
1294     *s = PyBytes_AS_STRING(obj);
1295     if (len != NULL)
1296         *len = PyBytes_GET_SIZE(obj);
1297     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1298         PyErr_SetString(PyExc_ValueError,
1299                         "embedded null byte");
1300         return -1;
1301     }
1302     return 0;
1303 }
1304 
1305 /* -------------------------------------------------------------------- */
1306 /* Methods */
1307 
1308 #include "stringlib/stringdefs.h"
1309 
1310 #include "stringlib/fastsearch.h"
1311 #include "stringlib/count.h"
1312 #include "stringlib/find.h"
1313 #include "stringlib/join.h"
1314 #include "stringlib/partition.h"
1315 #include "stringlib/split.h"
1316 #include "stringlib/ctype.h"
1317 
1318 #include "stringlib/transmogrify.h"
1319 
1320 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1321 PyBytes_Repr(PyObject *obj, int smartquotes)
1322 {
1323     PyBytesObject* op = (PyBytesObject*) obj;
1324     Py_ssize_t i, length = Py_SIZE(op);
1325     Py_ssize_t newsize, squotes, dquotes;
1326     PyObject *v;
1327     unsigned char quote, *s, *p;
1328 
1329     /* Compute size of output string */
1330     squotes = dquotes = 0;
1331     newsize = 3; /* b'' */
1332     s = (unsigned char*)op->ob_sval;
1333     for (i = 0; i < length; i++) {
1334         Py_ssize_t incr = 1;
1335         switch(s[i]) {
1336         case '\'': squotes++; break;
1337         case '"':  dquotes++; break;
1338         case '\\': case '\t': case '\n': case '\r':
1339             incr = 2; break; /* \C */
1340         default:
1341             if (s[i] < ' ' || s[i] >= 0x7f)
1342                 incr = 4; /* \xHH */
1343         }
1344         if (newsize > PY_SSIZE_T_MAX - incr)
1345             goto overflow;
1346         newsize += incr;
1347     }
1348     quote = '\'';
1349     if (smartquotes && squotes && !dquotes)
1350         quote = '"';
1351     if (squotes && quote == '\'') {
1352         if (newsize > PY_SSIZE_T_MAX - squotes)
1353             goto overflow;
1354         newsize += squotes;
1355     }
1356 
1357     v = PyUnicode_New(newsize, 127);
1358     if (v == NULL) {
1359         return NULL;
1360     }
1361     p = PyUnicode_1BYTE_DATA(v);
1362 
1363     *p++ = 'b', *p++ = quote;
1364     for (i = 0; i < length; i++) {
1365         unsigned char c = op->ob_sval[i];
1366         if (c == quote || c == '\\')
1367             *p++ = '\\', *p++ = c;
1368         else if (c == '\t')
1369             *p++ = '\\', *p++ = 't';
1370         else if (c == '\n')
1371             *p++ = '\\', *p++ = 'n';
1372         else if (c == '\r')
1373             *p++ = '\\', *p++ = 'r';
1374         else if (c < ' ' || c >= 0x7f) {
1375             *p++ = '\\';
1376             *p++ = 'x';
1377             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1378             *p++ = Py_hexdigits[c & 0xf];
1379         }
1380         else
1381             *p++ = c;
1382     }
1383     *p++ = quote;
1384     assert(_PyUnicode_CheckConsistency(v, 1));
1385     return v;
1386 
1387   overflow:
1388     PyErr_SetString(PyExc_OverflowError,
1389                     "bytes object is too large to make repr");
1390     return NULL;
1391 }
1392 
1393 static PyObject *
bytes_repr(PyObject * op)1394 bytes_repr(PyObject *op)
1395 {
1396     return PyBytes_Repr(op, 1);
1397 }
1398 
1399 static PyObject *
bytes_str(PyObject * op)1400 bytes_str(PyObject *op)
1401 {
1402     if (Py_BytesWarningFlag) {
1403         if (PyErr_WarnEx(PyExc_BytesWarning,
1404                          "str() on a bytes instance", 1))
1405             return NULL;
1406     }
1407     return bytes_repr(op);
1408 }
1409 
1410 static Py_ssize_t
bytes_length(PyBytesObject * a)1411 bytes_length(PyBytesObject *a)
1412 {
1413     return Py_SIZE(a);
1414 }
1415 
1416 /* This is also used by PyBytes_Concat() */
1417 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1418 bytes_concat(PyObject *a, PyObject *b)
1419 {
1420     Py_buffer va, vb;
1421     PyObject *result = NULL;
1422 
1423     va.len = -1;
1424     vb.len = -1;
1425     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1426         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1427         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1428                      Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1429         goto done;
1430     }
1431 
1432     /* Optimize end cases */
1433     if (va.len == 0 && PyBytes_CheckExact(b)) {
1434         result = b;
1435         Py_INCREF(result);
1436         goto done;
1437     }
1438     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1439         result = a;
1440         Py_INCREF(result);
1441         goto done;
1442     }
1443 
1444     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1445         PyErr_NoMemory();
1446         goto done;
1447     }
1448 
1449     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1450     if (result != NULL) {
1451         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1452         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1453     }
1454 
1455   done:
1456     if (va.len != -1)
1457         PyBuffer_Release(&va);
1458     if (vb.len != -1)
1459         PyBuffer_Release(&vb);
1460     return result;
1461 }
1462 
1463 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1464 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1465 {
1466     Py_ssize_t i;
1467     Py_ssize_t j;
1468     Py_ssize_t size;
1469     PyBytesObject *op;
1470     size_t nbytes;
1471     if (n < 0)
1472         n = 0;
1473     /* watch out for overflows:  the size can overflow int,
1474      * and the # of bytes needed can overflow size_t
1475      */
1476     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1477         PyErr_SetString(PyExc_OverflowError,
1478             "repeated bytes are too long");
1479         return NULL;
1480     }
1481     size = Py_SIZE(a) * n;
1482     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483         Py_INCREF(a);
1484         return (PyObject *)a;
1485     }
1486     nbytes = (size_t)size;
1487     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488         PyErr_SetString(PyExc_OverflowError,
1489             "repeated bytes are too long");
1490         return NULL;
1491     }
1492     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1493     if (op == NULL)
1494         return PyErr_NoMemory();
1495     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1496     op->ob_shash = -1;
1497     op->ob_sval[size] = '\0';
1498     if (Py_SIZE(a) == 1 && n > 0) {
1499         memset(op->ob_sval, a->ob_sval[0] , n);
1500         return (PyObject *) op;
1501     }
1502     i = 0;
1503     if (i < size) {
1504         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1505         i = Py_SIZE(a);
1506     }
1507     while (i < size) {
1508         j = (i <= size-i)  ?  i  :  size-i;
1509         memcpy(op->ob_sval+i, op->ob_sval, j);
1510         i += j;
1511     }
1512     return (PyObject *) op;
1513 }
1514 
1515 static int
bytes_contains(PyObject * self,PyObject * arg)1516 bytes_contains(PyObject *self, PyObject *arg)
1517 {
1518     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1519 }
1520 
1521 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1522 bytes_item(PyBytesObject *a, Py_ssize_t i)
1523 {
1524     if (i < 0 || i >= Py_SIZE(a)) {
1525         PyErr_SetString(PyExc_IndexError, "index out of range");
1526         return NULL;
1527     }
1528     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1529 }
1530 
1531 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1532 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1533 {
1534     int cmp;
1535     Py_ssize_t len;
1536 
1537     len = Py_SIZE(a);
1538     if (Py_SIZE(b) != len)
1539         return 0;
1540 
1541     if (a->ob_sval[0] != b->ob_sval[0])
1542         return 0;
1543 
1544     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1545     return (cmp == 0);
1546 }
1547 
1548 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1549 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1550 {
1551     int c;
1552     Py_ssize_t len_a, len_b;
1553     Py_ssize_t min_len;
1554     PyObject *result;
1555     int rc;
1556 
1557     /* Make sure both arguments are strings. */
1558     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1559         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1560             rc = PyObject_IsInstance((PyObject*)a,
1561                                      (PyObject*)&PyUnicode_Type);
1562             if (!rc)
1563                 rc = PyObject_IsInstance((PyObject*)b,
1564                                          (PyObject*)&PyUnicode_Type);
1565             if (rc < 0)
1566                 return NULL;
1567             if (rc) {
1568                 if (PyErr_WarnEx(PyExc_BytesWarning,
1569                                  "Comparison between bytes and string", 1))
1570                     return NULL;
1571             }
1572             else {
1573                 rc = PyObject_IsInstance((PyObject*)a,
1574                                          (PyObject*)&PyLong_Type);
1575                 if (!rc)
1576                     rc = PyObject_IsInstance((PyObject*)b,
1577                                              (PyObject*)&PyLong_Type);
1578                 if (rc < 0)
1579                     return NULL;
1580                 if (rc) {
1581                     if (PyErr_WarnEx(PyExc_BytesWarning,
1582                                      "Comparison between bytes and int", 1))
1583                         return NULL;
1584                 }
1585             }
1586         }
1587         result = Py_NotImplemented;
1588     }
1589     else if (a == b) {
1590         switch (op) {
1591         case Py_EQ:
1592         case Py_LE:
1593         case Py_GE:
1594             /* a string is equal to itself */
1595             result = Py_True;
1596             break;
1597         case Py_NE:
1598         case Py_LT:
1599         case Py_GT:
1600             result = Py_False;
1601             break;
1602         default:
1603             PyErr_BadArgument();
1604             return NULL;
1605         }
1606     }
1607     else if (op == Py_EQ || op == Py_NE) {
1608         int eq = bytes_compare_eq(a, b);
1609         eq ^= (op == Py_NE);
1610         result = eq ? Py_True : Py_False;
1611     }
1612     else {
1613         len_a = Py_SIZE(a);
1614         len_b = Py_SIZE(b);
1615         min_len = Py_MIN(len_a, len_b);
1616         if (min_len > 0) {
1617             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1618             if (c == 0)
1619                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1620         }
1621         else
1622             c = 0;
1623         if (c == 0)
1624             c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1625         switch (op) {
1626         case Py_LT: c = c <  0; break;
1627         case Py_LE: c = c <= 0; break;
1628         case Py_GT: c = c >  0; break;
1629         case Py_GE: c = c >= 0; break;
1630         default:
1631             PyErr_BadArgument();
1632             return NULL;
1633         }
1634         result = c ? Py_True : Py_False;
1635     }
1636 
1637     Py_INCREF(result);
1638     return result;
1639 }
1640 
1641 static Py_hash_t
bytes_hash(PyBytesObject * a)1642 bytes_hash(PyBytesObject *a)
1643 {
1644     if (a->ob_shash == -1) {
1645         /* Can't fail */
1646         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1647     }
1648     return a->ob_shash;
1649 }
1650 
1651 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1652 bytes_subscript(PyBytesObject* self, PyObject* item)
1653 {
1654     if (PyIndex_Check(item)) {
1655         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1656         if (i == -1 && PyErr_Occurred())
1657             return NULL;
1658         if (i < 0)
1659             i += PyBytes_GET_SIZE(self);
1660         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1661             PyErr_SetString(PyExc_IndexError,
1662                             "index out of range");
1663             return NULL;
1664         }
1665         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1666     }
1667     else if (PySlice_Check(item)) {
1668         Py_ssize_t start, stop, step, slicelength, cur, i;
1669         char* source_buf;
1670         char* result_buf;
1671         PyObject* result;
1672 
1673         if (PySlice_GetIndicesEx(item,
1674                          PyBytes_GET_SIZE(self),
1675                          &start, &stop, &step, &slicelength) < 0) {
1676             return NULL;
1677         }
1678 
1679         if (slicelength <= 0) {
1680             return PyBytes_FromStringAndSize("", 0);
1681         }
1682         else if (start == 0 && step == 1 &&
1683                  slicelength == PyBytes_GET_SIZE(self) &&
1684                  PyBytes_CheckExact(self)) {
1685             Py_INCREF(self);
1686             return (PyObject *)self;
1687         }
1688         else if (step == 1) {
1689             return PyBytes_FromStringAndSize(
1690                 PyBytes_AS_STRING(self) + start,
1691                 slicelength);
1692         }
1693         else {
1694             source_buf = PyBytes_AS_STRING(self);
1695             result = PyBytes_FromStringAndSize(NULL, slicelength);
1696             if (result == NULL)
1697                 return NULL;
1698 
1699             result_buf = PyBytes_AS_STRING(result);
1700             for (cur = start, i = 0; i < slicelength;
1701                  cur += step, i++) {
1702                 result_buf[i] = source_buf[cur];
1703             }
1704 
1705             return result;
1706         }
1707     }
1708     else {
1709         PyErr_Format(PyExc_TypeError,
1710                      "byte indices must be integers or slices, not %.200s",
1711                      Py_TYPE(item)->tp_name);
1712         return NULL;
1713     }
1714 }
1715 
1716 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1717 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718 {
1719     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720                              1, flags);
1721 }
1722 
1723 static PySequenceMethods bytes_as_sequence = {
1724     (lenfunc)bytes_length, /*sq_length*/
1725     (binaryfunc)bytes_concat, /*sq_concat*/
1726     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727     (ssizeargfunc)bytes_item, /*sq_item*/
1728     0,                  /*sq_slice*/
1729     0,                  /*sq_ass_item*/
1730     0,                  /*sq_ass_slice*/
1731     (objobjproc)bytes_contains /*sq_contains*/
1732 };
1733 
1734 static PyMappingMethods bytes_as_mapping = {
1735     (lenfunc)bytes_length,
1736     (binaryfunc)bytes_subscript,
1737     0,
1738 };
1739 
1740 static PyBufferProcs bytes_as_buffer = {
1741     (getbufferproc)bytes_buffer_getbuffer,
1742     NULL,
1743 };
1744 
1745 
1746 #define LEFTSTRIP 0
1747 #define RIGHTSTRIP 1
1748 #define BOTHSTRIP 2
1749 
1750 /*[clinic input]
1751 bytes.split
1752 
1753     sep: object = None
1754         The delimiter according which to split the bytes.
1755         None (the default value) means split on ASCII whitespace characters
1756         (space, tab, return, newline, formfeed, vertical tab).
1757     maxsplit: Py_ssize_t = -1
1758         Maximum number of splits to do.
1759         -1 (the default value) means no limit.
1760 
1761 Return a list of the sections in the bytes, using sep as the delimiter.
1762 [clinic start generated code]*/
1763 
1764 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1765 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767 {
1768     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769     const char *s = PyBytes_AS_STRING(self), *sub;
1770     Py_buffer vsub;
1771     PyObject *list;
1772 
1773     if (maxsplit < 0)
1774         maxsplit = PY_SSIZE_T_MAX;
1775     if (sep == Py_None)
1776         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778         return NULL;
1779     sub = vsub.buf;
1780     n = vsub.len;
1781 
1782     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783     PyBuffer_Release(&vsub);
1784     return list;
1785 }
1786 
1787 /*[clinic input]
1788 bytes.partition
1789 
1790     sep: Py_buffer
1791     /
1792 
1793 Partition the bytes into three parts using the given separator.
1794 
1795 This will search for the separator sep in the bytes. If the separator is found,
1796 returns a 3-tuple containing the part before the separator, the separator
1797 itself, and the part after it.
1798 
1799 If the separator is not found, returns a 3-tuple containing the original bytes
1800 object and two empty bytes objects.
1801 [clinic start generated code]*/
1802 
1803 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1804 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806 {
1807     return stringlib_partition(
1808         (PyObject*) self,
1809         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810         sep->obj, (const char *)sep->buf, sep->len
1811         );
1812 }
1813 
1814 /*[clinic input]
1815 bytes.rpartition
1816 
1817     sep: Py_buffer
1818     /
1819 
1820 Partition the bytes into three parts using the given separator.
1821 
1822 This will search for the separator sep in the bytes, starting and the end. If
1823 the separator is found, returns a 3-tuple containing the part before the
1824 separator, the separator itself, and the part after it.
1825 
1826 If the separator is not found, returns a 3-tuple containing two empty bytes
1827 objects and the original bytes object.
1828 [clinic start generated code]*/
1829 
1830 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1831 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832 /*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
1833 {
1834     return stringlib_rpartition(
1835         (PyObject*) self,
1836         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837         sep->obj, (const char *)sep->buf, sep->len
1838         );
1839 }
1840 
1841 /*[clinic input]
1842 bytes.rsplit = bytes.split
1843 
1844 Return a list of the sections in the bytes, using sep as the delimiter.
1845 
1846 Splitting is done starting at the end of the bytes and working to the front.
1847 [clinic start generated code]*/
1848 
1849 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1850 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852 {
1853     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854     const char *s = PyBytes_AS_STRING(self), *sub;
1855     Py_buffer vsub;
1856     PyObject *list;
1857 
1858     if (maxsplit < 0)
1859         maxsplit = PY_SSIZE_T_MAX;
1860     if (sep == Py_None)
1861         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863         return NULL;
1864     sub = vsub.buf;
1865     n = vsub.len;
1866 
1867     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868     PyBuffer_Release(&vsub);
1869     return list;
1870 }
1871 
1872 
1873 /*[clinic input]
1874 bytes.join
1875 
1876     iterable_of_bytes: object
1877     /
1878 
1879 Concatenate any number of bytes objects.
1880 
1881 The bytes whose method is called is inserted in between each pair.
1882 
1883 The result is returned as a new bytes object.
1884 
1885 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886 [clinic start generated code]*/
1887 
1888 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1889 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891 {
1892     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893 }
1894 
1895 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1896 _PyBytes_Join(PyObject *sep, PyObject *x)
1897 {
1898     assert(sep != NULL && PyBytes_Check(sep));
1899     assert(x != NULL);
1900     return bytes_join((PyBytesObject*)sep, x);
1901 }
1902 
1903 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1904 bytes_find(PyBytesObject *self, PyObject *args)
1905 {
1906     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907 }
1908 
1909 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1910 bytes_index(PyBytesObject *self, PyObject *args)
1911 {
1912     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913 }
1914 
1915 
1916 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1917 bytes_rfind(PyBytesObject *self, PyObject *args)
1918 {
1919     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920 }
1921 
1922 
1923 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1924 bytes_rindex(PyBytesObject *self, PyObject *args)
1925 {
1926     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927 }
1928 
1929 
1930 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1931 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932 {
1933     Py_buffer vsep;
1934     char *s = PyBytes_AS_STRING(self);
1935     Py_ssize_t len = PyBytes_GET_SIZE(self);
1936     char *sep;
1937     Py_ssize_t seplen;
1938     Py_ssize_t i, j;
1939 
1940     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941         return NULL;
1942     sep = vsep.buf;
1943     seplen = vsep.len;
1944 
1945     i = 0;
1946     if (striptype != RIGHTSTRIP) {
1947         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948             i++;
1949         }
1950     }
1951 
1952     j = len;
1953     if (striptype != LEFTSTRIP) {
1954         do {
1955             j--;
1956         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957         j++;
1958     }
1959 
1960     PyBuffer_Release(&vsep);
1961 
1962     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963         Py_INCREF(self);
1964         return (PyObject*)self;
1965     }
1966     else
1967         return PyBytes_FromStringAndSize(s+i, j-i);
1968 }
1969 
1970 
1971 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1972 do_strip(PyBytesObject *self, int striptype)
1973 {
1974     char *s = PyBytes_AS_STRING(self);
1975     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976 
1977     i = 0;
1978     if (striptype != RIGHTSTRIP) {
1979         while (i < len && Py_ISSPACE(s[i])) {
1980             i++;
1981         }
1982     }
1983 
1984     j = len;
1985     if (striptype != LEFTSTRIP) {
1986         do {
1987             j--;
1988         } while (j >= i && Py_ISSPACE(s[j]));
1989         j++;
1990     }
1991 
1992     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993         Py_INCREF(self);
1994         return (PyObject*)self;
1995     }
1996     else
1997         return PyBytes_FromStringAndSize(s+i, j-i);
1998 }
1999 
2000 
2001 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2002 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003 {
2004     if (bytes != NULL && bytes != Py_None) {
2005         return do_xstrip(self, striptype, bytes);
2006     }
2007     return do_strip(self, striptype);
2008 }
2009 
2010 /*[clinic input]
2011 bytes.strip
2012 
2013     bytes: object = None
2014     /
2015 
2016 Strip leading and trailing bytes contained in the argument.
2017 
2018 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019 [clinic start generated code]*/
2020 
2021 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2022 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024 {
2025     return do_argstrip(self, BOTHSTRIP, bytes);
2026 }
2027 
2028 /*[clinic input]
2029 bytes.lstrip
2030 
2031     bytes: object = None
2032     /
2033 
2034 Strip leading bytes contained in the argument.
2035 
2036 If the argument is omitted or None, strip leading  ASCII whitespace.
2037 [clinic start generated code]*/
2038 
2039 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2040 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042 {
2043     return do_argstrip(self, LEFTSTRIP, bytes);
2044 }
2045 
2046 /*[clinic input]
2047 bytes.rstrip
2048 
2049     bytes: object = None
2050     /
2051 
2052 Strip trailing bytes contained in the argument.
2053 
2054 If the argument is omitted or None, strip trailing ASCII whitespace.
2055 [clinic start generated code]*/
2056 
2057 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2058 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060 {
2061     return do_argstrip(self, RIGHTSTRIP, bytes);
2062 }
2063 
2064 
2065 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2066 bytes_count(PyBytesObject *self, PyObject *args)
2067 {
2068     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069 }
2070 
2071 
2072 /*[clinic input]
2073 bytes.translate
2074 
2075     table: object
2076         Translation table, which must be a bytes object of length 256.
2077     /
2078     delete as deletechars: object(c_default="NULL") = b''
2079 
2080 Return a copy with each character mapped by the given translation table.
2081 
2082 All characters occurring in the optional argument delete are removed.
2083 The remaining characters are mapped through the given translation table.
2084 [clinic start generated code]*/
2085 
2086 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2087 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2088                      PyObject *deletechars)
2089 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090 {
2091     char *input, *output;
2092     Py_buffer table_view = {NULL, NULL};
2093     Py_buffer del_table_view = {NULL, NULL};
2094     const char *table_chars;
2095     Py_ssize_t i, c, changed = 0;
2096     PyObject *input_obj = (PyObject*)self;
2097     const char *output_start, *del_table_chars=NULL;
2098     Py_ssize_t inlen, tablen, dellen = 0;
2099     PyObject *result;
2100     int trans_table[256];
2101 
2102     if (PyBytes_Check(table)) {
2103         table_chars = PyBytes_AS_STRING(table);
2104         tablen = PyBytes_GET_SIZE(table);
2105     }
2106     else if (table == Py_None) {
2107         table_chars = NULL;
2108         tablen = 256;
2109     }
2110     else {
2111         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112             return NULL;
2113         table_chars = table_view.buf;
2114         tablen = table_view.len;
2115     }
2116 
2117     if (tablen != 256) {
2118         PyErr_SetString(PyExc_ValueError,
2119           "translation table must be 256 characters long");
2120         PyBuffer_Release(&table_view);
2121         return NULL;
2122     }
2123 
2124     if (deletechars != NULL) {
2125         if (PyBytes_Check(deletechars)) {
2126             del_table_chars = PyBytes_AS_STRING(deletechars);
2127             dellen = PyBytes_GET_SIZE(deletechars);
2128         }
2129         else {
2130             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131                 PyBuffer_Release(&table_view);
2132                 return NULL;
2133             }
2134             del_table_chars = del_table_view.buf;
2135             dellen = del_table_view.len;
2136         }
2137     }
2138     else {
2139         del_table_chars = NULL;
2140         dellen = 0;
2141     }
2142 
2143     inlen = PyBytes_GET_SIZE(input_obj);
2144     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145     if (result == NULL) {
2146         PyBuffer_Release(&del_table_view);
2147         PyBuffer_Release(&table_view);
2148         return NULL;
2149     }
2150     output_start = output = PyBytes_AS_STRING(result);
2151     input = PyBytes_AS_STRING(input_obj);
2152 
2153     if (dellen == 0 && table_chars != NULL) {
2154         /* If no deletions are required, use faster code */
2155         for (i = inlen; --i >= 0; ) {
2156             c = Py_CHARMASK(*input++);
2157             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158                 changed = 1;
2159         }
2160         if (!changed && PyBytes_CheckExact(input_obj)) {
2161             Py_INCREF(input_obj);
2162             Py_DECREF(result);
2163             result = input_obj;
2164         }
2165         PyBuffer_Release(&del_table_view);
2166         PyBuffer_Release(&table_view);
2167         return result;
2168     }
2169 
2170     if (table_chars == NULL) {
2171         for (i = 0; i < 256; i++)
2172             trans_table[i] = Py_CHARMASK(i);
2173     } else {
2174         for (i = 0; i < 256; i++)
2175             trans_table[i] = Py_CHARMASK(table_chars[i]);
2176     }
2177     PyBuffer_Release(&table_view);
2178 
2179     for (i = 0; i < dellen; i++)
2180         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181     PyBuffer_Release(&del_table_view);
2182 
2183     for (i = inlen; --i >= 0; ) {
2184         c = Py_CHARMASK(*input++);
2185         if (trans_table[c] != -1)
2186             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187                 continue;
2188         changed = 1;
2189     }
2190     if (!changed && PyBytes_CheckExact(input_obj)) {
2191         Py_DECREF(result);
2192         Py_INCREF(input_obj);
2193         return input_obj;
2194     }
2195     /* Fix the size of the resulting string */
2196     if (inlen > 0)
2197         _PyBytes_Resize(&result, output - output_start);
2198     return result;
2199 }
2200 
2201 
2202 /*[clinic input]
2203 
2204 @staticmethod
2205 bytes.maketrans
2206 
2207     frm: Py_buffer
2208     to: Py_buffer
2209     /
2210 
2211 Return a translation table useable for the bytes or bytearray translate method.
2212 
2213 The returned table will be one where each byte in frm is mapped to the byte at
2214 the same position in to.
2215 
2216 The bytes objects frm and to must be of the same length.
2217 [clinic start generated code]*/
2218 
2219 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2220 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222 {
2223     return _Py_bytes_maketrans(frm, to);
2224 }
2225 
2226 
2227 /*[clinic input]
2228 bytes.replace
2229 
2230     old: Py_buffer
2231     new: Py_buffer
2232     count: Py_ssize_t = -1
2233         Maximum number of occurrences to replace.
2234         -1 (the default value) means replace all occurrences.
2235     /
2236 
2237 Return a copy with all occurrences of substring old replaced by new.
2238 
2239 If the optional argument count is given, only the first count occurrences are
2240 replaced.
2241 [clinic start generated code]*/
2242 
2243 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2244 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245                    Py_ssize_t count)
2246 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247 {
2248     return stringlib_replace((PyObject *)self,
2249                              (const char *)old->buf, old->len,
2250                              (const char *)new->buf, new->len, count);
2251 }
2252 
2253 /** End DALKE **/
2254 
2255 
2256 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2257 bytes_startswith(PyBytesObject *self, PyObject *args)
2258 {
2259     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260 }
2261 
2262 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2263 bytes_endswith(PyBytesObject *self, PyObject *args)
2264 {
2265     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266 }
2267 
2268 
2269 /*[clinic input]
2270 bytes.decode
2271 
2272     encoding: str(c_default="NULL") = 'utf-8'
2273         The encoding with which to decode the bytes.
2274     errors: str(c_default="NULL") = 'strict'
2275         The error handling scheme to use for the handling of decoding errors.
2276         The default is 'strict' meaning that decoding errors raise a
2277         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278         as well as any other name registered with codecs.register_error that
2279         can handle UnicodeDecodeErrors.
2280 
2281 Decode the bytes using the codec registered for encoding.
2282 [clinic start generated code]*/
2283 
2284 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2285 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2286                   const char *errors)
2287 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288 {
2289     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290 }
2291 
2292 
2293 /*[clinic input]
2294 bytes.splitlines
2295 
2296     keepends: int(c_default="0") = False
2297 
2298 Return a list of the lines in the bytes, breaking at line boundaries.
2299 
2300 Line breaks are not included in the resulting list unless keepends is given and
2301 true.
2302 [clinic start generated code]*/
2303 
2304 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2305 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306 /*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
2307 {
2308     return stringlib_splitlines(
2309         (PyObject*) self, PyBytes_AS_STRING(self),
2310         PyBytes_GET_SIZE(self), keepends
2311         );
2312 }
2313 
2314 /*[clinic input]
2315 @classmethod
2316 bytes.fromhex
2317 
2318     string: unicode
2319     /
2320 
2321 Create a bytes object from a string of hexadecimal numbers.
2322 
2323 Spaces between two numbers are accepted.
2324 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325 [clinic start generated code]*/
2326 
2327 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2328 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330 {
2331     PyObject *result = _PyBytes_FromHex(string, 0);
2332     if (type != &PyBytes_Type && result != NULL) {
2333         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334                                                        result, NULL));
2335     }
2336     return result;
2337 }
2338 
2339 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2340 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2341 {
2342     char *buf;
2343     Py_ssize_t hexlen, invalid_char;
2344     unsigned int top, bot;
2345     Py_UCS1 *str, *end;
2346     _PyBytesWriter writer;
2347 
2348     _PyBytesWriter_Init(&writer);
2349     writer.use_bytearray = use_bytearray;
2350 
2351     assert(PyUnicode_Check(string));
2352     if (PyUnicode_READY(string))
2353         return NULL;
2354     hexlen = PyUnicode_GET_LENGTH(string);
2355 
2356     if (!PyUnicode_IS_ASCII(string)) {
2357         void *data = PyUnicode_DATA(string);
2358         unsigned int kind = PyUnicode_KIND(string);
2359         Py_ssize_t i;
2360 
2361         /* search for the first non-ASCII character */
2362         for (i = 0; i < hexlen; i++) {
2363             if (PyUnicode_READ(kind, data, i) >= 128)
2364                 break;
2365         }
2366         invalid_char = i;
2367         goto error;
2368     }
2369 
2370     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371     str = PyUnicode_1BYTE_DATA(string);
2372 
2373     /* This overestimates if there are spaces */
2374     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375     if (buf == NULL)
2376         return NULL;
2377 
2378     end = str + hexlen;
2379     while (str < end) {
2380         /* skip over spaces in the input */
2381         if (*str == ' ') {
2382             do {
2383                 str++;
2384             } while (*str == ' ');
2385             if (str >= end)
2386                 break;
2387         }
2388 
2389         top = _PyLong_DigitValue[*str];
2390         if (top >= 16) {
2391             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392             goto error;
2393         }
2394         str++;
2395 
2396         bot = _PyLong_DigitValue[*str];
2397         if (bot >= 16) {
2398             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399             goto error;
2400         }
2401         str++;
2402 
2403         *buf++ = (unsigned char)((top << 4) + bot);
2404     }
2405 
2406     return _PyBytesWriter_Finish(&writer, buf);
2407 
2408   error:
2409     PyErr_Format(PyExc_ValueError,
2410                  "non-hexadecimal number found in "
2411                  "fromhex() arg at position %zd", invalid_char);
2412     _PyBytesWriter_Dealloc(&writer);
2413     return NULL;
2414 }
2415 
2416 PyDoc_STRVAR(hex__doc__,
2417 "B.hex() -> string\n\
2418 \n\
2419 Create a string of hexadecimal numbers from a bytes object.\n\
2420 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2421 
2422 static PyObject *
bytes_hex(PyBytesObject * self)2423 bytes_hex(PyBytesObject *self)
2424 {
2425     char* argbuf = PyBytes_AS_STRING(self);
2426     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2427     return _Py_strhex(argbuf, arglen);
2428 }
2429 
2430 static PyObject *
bytes_getnewargs(PyBytesObject * v)2431 bytes_getnewargs(PyBytesObject *v)
2432 {
2433     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2434 }
2435 
2436 
2437 static PyMethodDef
2438 bytes_methods[] = {
2439     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2440     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2441      _Py_capitalize__doc__},
2442     {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2443      _Py_center__doc__},
2444     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2445      _Py_count__doc__},
2446     BYTES_DECODE_METHODDEF
2447     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2448      _Py_endswith__doc__},
2449     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2450      _Py_expandtabs__doc__},
2451     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2452      _Py_find__doc__},
2453     BYTES_FROMHEX_METHODDEF
2454     {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2455     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2456     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2457      _Py_isalnum__doc__},
2458     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2459      _Py_isalpha__doc__},
2460     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2461      _Py_isdigit__doc__},
2462     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2463      _Py_islower__doc__},
2464     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2465      _Py_isspace__doc__},
2466     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2467      _Py_istitle__doc__},
2468     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2469      _Py_isupper__doc__},
2470     BYTES_JOIN_METHODDEF
2471     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2472     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2473     BYTES_LSTRIP_METHODDEF
2474     BYTES_MAKETRANS_METHODDEF
2475     BYTES_PARTITION_METHODDEF
2476     BYTES_REPLACE_METHODDEF
2477     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2478     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2479     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2480     BYTES_RPARTITION_METHODDEF
2481     BYTES_RSPLIT_METHODDEF
2482     BYTES_RSTRIP_METHODDEF
2483     BYTES_SPLIT_METHODDEF
2484     BYTES_SPLITLINES_METHODDEF
2485     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2486      _Py_startswith__doc__},
2487     BYTES_STRIP_METHODDEF
2488     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2489      _Py_swapcase__doc__},
2490     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2491     BYTES_TRANSLATE_METHODDEF
2492     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2493     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2494     {NULL,     NULL}                         /* sentinel */
2495 };
2496 
2497 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2498 bytes_mod(PyObject *self, PyObject *arg)
2499 {
2500     if (!PyBytes_Check(self)) {
2501         Py_RETURN_NOTIMPLEMENTED;
2502     }
2503     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2504                              arg, 0);
2505 }
2506 
2507 static PyNumberMethods bytes_as_number = {
2508     0,              /*nb_add*/
2509     0,              /*nb_subtract*/
2510     0,              /*nb_multiply*/
2511     bytes_mod,      /*nb_remainder*/
2512 };
2513 
2514 static PyObject *
2515 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2516 
2517 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2518 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2519 {
2520     PyObject *x = NULL;
2521     const char *encoding = NULL;
2522     const char *errors = NULL;
2523     PyObject *new = NULL;
2524     PyObject *func;
2525     Py_ssize_t size;
2526     static char *kwlist[] = {"source", "encoding", "errors", 0};
2527     _Py_IDENTIFIER(__bytes__);
2528 
2529     if (type != &PyBytes_Type)
2530         return bytes_subtype_new(type, args, kwds);
2531     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2532                                      &encoding, &errors))
2533         return NULL;
2534     if (x == NULL) {
2535         if (encoding != NULL || errors != NULL) {
2536             PyErr_SetString(PyExc_TypeError,
2537                             "encoding or errors without sequence "
2538                             "argument");
2539             return NULL;
2540         }
2541         return PyBytes_FromStringAndSize(NULL, 0);
2542     }
2543 
2544     if (encoding != NULL) {
2545         /* Encode via the codec registry */
2546         if (!PyUnicode_Check(x)) {
2547             PyErr_SetString(PyExc_TypeError,
2548                             "encoding without a string argument");
2549             return NULL;
2550         }
2551         new = PyUnicode_AsEncodedString(x, encoding, errors);
2552         if (new == NULL)
2553             return NULL;
2554         assert(PyBytes_Check(new));
2555         return new;
2556     }
2557 
2558     if (errors != NULL) {
2559         PyErr_SetString(PyExc_TypeError,
2560                         PyUnicode_Check(x) ?
2561                         "string argument without an encoding" :
2562                         "errors without a string argument");
2563         return NULL;
2564     }
2565 
2566     /* We'd like to call PyObject_Bytes here, but we need to check for an
2567        integer argument before deferring to PyBytes_FromObject, something
2568        PyObject_Bytes doesn't do. */
2569     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2570     if (func != NULL) {
2571         new = PyObject_CallFunctionObjArgs(func, NULL);
2572         Py_DECREF(func);
2573         if (new == NULL)
2574             return NULL;
2575         if (!PyBytes_Check(new)) {
2576             PyErr_Format(PyExc_TypeError,
2577                          "__bytes__ returned non-bytes (type %.200s)",
2578                          Py_TYPE(new)->tp_name);
2579             Py_DECREF(new);
2580             return NULL;
2581         }
2582         return new;
2583     }
2584     else if (PyErr_Occurred())
2585         return NULL;
2586 
2587     if (PyUnicode_Check(x)) {
2588         PyErr_SetString(PyExc_TypeError,
2589                         "string argument without an encoding");
2590         return NULL;
2591     }
2592     /* Is it an integer? */
2593     if (PyIndex_Check(x)) {
2594         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2595         if (size == -1 && PyErr_Occurred()) {
2596             if (PyErr_ExceptionMatches(PyExc_OverflowError))
2597                 return NULL;
2598             PyErr_Clear();  /* fall through */
2599         }
2600         else {
2601             if (size < 0) {
2602                 PyErr_SetString(PyExc_ValueError, "negative count");
2603                 return NULL;
2604             }
2605             new = _PyBytes_FromSize(size, 1);
2606             if (new == NULL)
2607                 return NULL;
2608             return new;
2609         }
2610     }
2611 
2612     return PyBytes_FromObject(x);
2613 }
2614 
2615 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2616 _PyBytes_FromBuffer(PyObject *x)
2617 {
2618     PyObject *new;
2619     Py_buffer view;
2620 
2621     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2622         return NULL;
2623 
2624     new = PyBytes_FromStringAndSize(NULL, view.len);
2625     if (!new)
2626         goto fail;
2627     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2628                 &view, view.len, 'C') < 0)
2629         goto fail;
2630     PyBuffer_Release(&view);
2631     return new;
2632 
2633 fail:
2634     Py_XDECREF(new);
2635     PyBuffer_Release(&view);
2636     return NULL;
2637 }
2638 
2639 #define _PyBytes_FROM_LIST_BODY(x, GET_ITEM)                                \
2640     do {                                                                    \
2641         PyObject *bytes;                                                    \
2642         Py_ssize_t i;                                                       \
2643         Py_ssize_t value;                                                   \
2644         char *str;                                                          \
2645         PyObject *item;                                                     \
2646                                                                             \
2647         bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));                \
2648         if (bytes == NULL)                                                  \
2649             return NULL;                                                    \
2650         str = ((PyBytesObject *)bytes)->ob_sval;                            \
2651                                                                             \
2652         for (i = 0; i < Py_SIZE(x); i++) {                                  \
2653             item = GET_ITEM((x), i);                                        \
2654             value = PyNumber_AsSsize_t(item, NULL);                         \
2655             if (value == -1 && PyErr_Occurred())                            \
2656                 goto error;                                                 \
2657                                                                             \
2658             if (value < 0 || value >= 256) {                                \
2659                 PyErr_SetString(PyExc_ValueError,                           \
2660                                 "bytes must be in range(0, 256)");          \
2661                 goto error;                                                 \
2662             }                                                               \
2663             *str++ = (char) value;                                          \
2664         }                                                                   \
2665         return bytes;                                                       \
2666                                                                             \
2667     error:                                                                  \
2668         Py_DECREF(bytes);                                                   \
2669         return NULL;                                                        \
2670     } while (0)
2671 
2672 static PyObject*
_PyBytes_FromList(PyObject * x)2673 _PyBytes_FromList(PyObject *x)
2674 {
2675     _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
2676 }
2677 
2678 static PyObject*
_PyBytes_FromTuple(PyObject * x)2679 _PyBytes_FromTuple(PyObject *x)
2680 {
2681     _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
2682 }
2683 
2684 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2685 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2686 {
2687     char *str;
2688     Py_ssize_t i, size;
2689     _PyBytesWriter writer;
2690 
2691     /* For iterator version, create a string object and resize as needed */
2692     size = PyObject_LengthHint(x, 64);
2693     if (size == -1 && PyErr_Occurred())
2694         return NULL;
2695 
2696     _PyBytesWriter_Init(&writer);
2697     str = _PyBytesWriter_Alloc(&writer, size);
2698     if (str == NULL)
2699         return NULL;
2700     writer.overallocate = 1;
2701     size = writer.allocated;
2702 
2703     /* Run the iterator to exhaustion */
2704     for (i = 0; ; i++) {
2705         PyObject *item;
2706         Py_ssize_t value;
2707 
2708         /* Get the next item */
2709         item = PyIter_Next(it);
2710         if (item == NULL) {
2711             if (PyErr_Occurred())
2712                 goto error;
2713             break;
2714         }
2715 
2716         /* Interpret it as an int (__index__) */
2717         value = PyNumber_AsSsize_t(item, NULL);
2718         Py_DECREF(item);
2719         if (value == -1 && PyErr_Occurred())
2720             goto error;
2721 
2722         /* Range check */
2723         if (value < 0 || value >= 256) {
2724             PyErr_SetString(PyExc_ValueError,
2725                             "bytes must be in range(0, 256)");
2726             goto error;
2727         }
2728 
2729         /* Append the byte */
2730         if (i >= size) {
2731             str = _PyBytesWriter_Resize(&writer, str, size+1);
2732             if (str == NULL)
2733                 return NULL;
2734             size = writer.allocated;
2735         }
2736         *str++ = (char) value;
2737     }
2738 
2739     return _PyBytesWriter_Finish(&writer, str);
2740 
2741   error:
2742     _PyBytesWriter_Dealloc(&writer);
2743     return NULL;
2744 }
2745 
2746 PyObject *
PyBytes_FromObject(PyObject * x)2747 PyBytes_FromObject(PyObject *x)
2748 {
2749     PyObject *it, *result;
2750 
2751     if (x == NULL) {
2752         PyErr_BadInternalCall();
2753         return NULL;
2754     }
2755 
2756     if (PyBytes_CheckExact(x)) {
2757         Py_INCREF(x);
2758         return x;
2759     }
2760 
2761     /* Use the modern buffer interface */
2762     if (PyObject_CheckBuffer(x))
2763         return _PyBytes_FromBuffer(x);
2764 
2765     if (PyList_CheckExact(x))
2766         return _PyBytes_FromList(x);
2767 
2768     if (PyTuple_CheckExact(x))
2769         return _PyBytes_FromTuple(x);
2770 
2771     if (!PyUnicode_Check(x)) {
2772         it = PyObject_GetIter(x);
2773         if (it != NULL) {
2774             result = _PyBytes_FromIterator(it, x);
2775             Py_DECREF(it);
2776             return result;
2777         }
2778     }
2779 
2780     PyErr_Format(PyExc_TypeError,
2781                  "cannot convert '%.200s' object to bytes",
2782                  x->ob_type->tp_name);
2783     return NULL;
2784 }
2785 
2786 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2787 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2788 {
2789     PyObject *tmp, *pnew;
2790     Py_ssize_t n;
2791 
2792     assert(PyType_IsSubtype(type, &PyBytes_Type));
2793     tmp = bytes_new(&PyBytes_Type, args, kwds);
2794     if (tmp == NULL)
2795         return NULL;
2796     assert(PyBytes_Check(tmp));
2797     n = PyBytes_GET_SIZE(tmp);
2798     pnew = type->tp_alloc(type, n);
2799     if (pnew != NULL) {
2800         memcpy(PyBytes_AS_STRING(pnew),
2801                   PyBytes_AS_STRING(tmp), n+1);
2802         ((PyBytesObject *)pnew)->ob_shash =
2803             ((PyBytesObject *)tmp)->ob_shash;
2804     }
2805     Py_DECREF(tmp);
2806     return pnew;
2807 }
2808 
2809 PyDoc_STRVAR(bytes_doc,
2810 "bytes(iterable_of_ints) -> bytes\n\
2811 bytes(string, encoding[, errors]) -> bytes\n\
2812 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2813 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2814 bytes() -> empty bytes object\n\
2815 \n\
2816 Construct an immutable array of bytes from:\n\
2817   - an iterable yielding integers in range(256)\n\
2818   - a text string encoded using the specified encoding\n\
2819   - any object implementing the buffer API.\n\
2820   - an integer");
2821 
2822 static PyObject *bytes_iter(PyObject *seq);
2823 
2824 PyTypeObject PyBytes_Type = {
2825     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2826     "bytes",
2827     PyBytesObject_SIZE,
2828     sizeof(char),
2829     bytes_dealloc,                      /* tp_dealloc */
2830     0,                                          /* tp_print */
2831     0,                                          /* tp_getattr */
2832     0,                                          /* tp_setattr */
2833     0,                                          /* tp_reserved */
2834     (reprfunc)bytes_repr,                       /* tp_repr */
2835     &bytes_as_number,                           /* tp_as_number */
2836     &bytes_as_sequence,                         /* tp_as_sequence */
2837     &bytes_as_mapping,                          /* tp_as_mapping */
2838     (hashfunc)bytes_hash,                       /* tp_hash */
2839     0,                                          /* tp_call */
2840     bytes_str,                                  /* tp_str */
2841     PyObject_GenericGetAttr,                    /* tp_getattro */
2842     0,                                          /* tp_setattro */
2843     &bytes_as_buffer,                           /* tp_as_buffer */
2844     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2845         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2846     bytes_doc,                                  /* tp_doc */
2847     0,                                          /* tp_traverse */
2848     0,                                          /* tp_clear */
2849     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2850     0,                                          /* tp_weaklistoffset */
2851     bytes_iter,                                 /* tp_iter */
2852     0,                                          /* tp_iternext */
2853     bytes_methods,                              /* tp_methods */
2854     0,                                          /* tp_members */
2855     0,                                          /* tp_getset */
2856     &PyBaseObject_Type,                         /* tp_base */
2857     0,                                          /* tp_dict */
2858     0,                                          /* tp_descr_get */
2859     0,                                          /* tp_descr_set */
2860     0,                                          /* tp_dictoffset */
2861     0,                                          /* tp_init */
2862     0,                                          /* tp_alloc */
2863     bytes_new,                                  /* tp_new */
2864     PyObject_Del,                               /* tp_free */
2865 };
2866 
2867 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2868 PyBytes_Concat(PyObject **pv, PyObject *w)
2869 {
2870     assert(pv != NULL);
2871     if (*pv == NULL)
2872         return;
2873     if (w == NULL) {
2874         Py_CLEAR(*pv);
2875         return;
2876     }
2877 
2878     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2879         /* Only one reference, so we can resize in place */
2880         Py_ssize_t oldsize;
2881         Py_buffer wb;
2882 
2883         wb.len = -1;
2884         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2885             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2886                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2887             Py_CLEAR(*pv);
2888             return;
2889         }
2890 
2891         oldsize = PyBytes_GET_SIZE(*pv);
2892         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2893             PyErr_NoMemory();
2894             goto error;
2895         }
2896         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2897             goto error;
2898 
2899         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2900         PyBuffer_Release(&wb);
2901         return;
2902 
2903       error:
2904         PyBuffer_Release(&wb);
2905         Py_CLEAR(*pv);
2906         return;
2907     }
2908 
2909     else {
2910         /* Multiple references, need to create new object */
2911         PyObject *v;
2912         v = bytes_concat(*pv, w);
2913         Py_SETREF(*pv, v);
2914     }
2915 }
2916 
2917 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2918 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2919 {
2920     PyBytes_Concat(pv, w);
2921     Py_XDECREF(w);
2922 }
2923 
2924 
2925 /* The following function breaks the notion that bytes are immutable:
2926    it changes the size of a bytes object.  We get away with this only if there
2927    is only one module referencing the object.  You can also think of it
2928    as creating a new bytes object and destroying the old one, only
2929    more efficiently.  In any case, don't use this if the bytes object may
2930    already be known to some other part of the code...
2931    Note that if there's not enough memory to resize the bytes object, the
2932    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2933    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2934    returned, and the value in *pv may or may not be the same as on input.
2935    As always, an extra byte is allocated for a trailing \0 byte (newsize
2936    does *not* include that), and a trailing \0 byte is stored.
2937 */
2938 
2939 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2940 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2941 {
2942     PyObject *v;
2943     PyBytesObject *sv;
2944     v = *pv;
2945     if (!PyBytes_Check(v) || newsize < 0) {
2946         goto error;
2947     }
2948     if (Py_SIZE(v) == newsize) {
2949         /* return early if newsize equals to v->ob_size */
2950         return 0;
2951     }
2952     if (Py_REFCNT(v) != 1) {
2953         goto error;
2954     }
2955     /* XXX UNREF/NEWREF interface should be more symmetrical */
2956     _Py_DEC_REFTOTAL;
2957     _Py_ForgetReference(v);
2958     *pv = (PyObject *)
2959         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
2960     if (*pv == NULL) {
2961         PyObject_Del(v);
2962         PyErr_NoMemory();
2963         return -1;
2964     }
2965     _Py_NewReference(*pv);
2966     sv = (PyBytesObject *) *pv;
2967     Py_SIZE(sv) = newsize;
2968     sv->ob_sval[newsize] = '\0';
2969     sv->ob_shash = -1;          /* invalidate cached hash value */
2970     return 0;
2971 error:
2972     *pv = 0;
2973     Py_DECREF(v);
2974     PyErr_BadInternalCall();
2975     return -1;
2976 }
2977 
2978 void
PyBytes_Fini(void)2979 PyBytes_Fini(void)
2980 {
2981     int i;
2982     for (i = 0; i < UCHAR_MAX + 1; i++)
2983         Py_CLEAR(characters[i]);
2984     Py_CLEAR(nullstring);
2985 }
2986 
2987 /*********************** Bytes Iterator ****************************/
2988 
2989 typedef struct {
2990     PyObject_HEAD
2991     Py_ssize_t it_index;
2992     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
2993 } striterobject;
2994 
2995 static void
striter_dealloc(striterobject * it)2996 striter_dealloc(striterobject *it)
2997 {
2998     _PyObject_GC_UNTRACK(it);
2999     Py_XDECREF(it->it_seq);
3000     PyObject_GC_Del(it);
3001 }
3002 
3003 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3004 striter_traverse(striterobject *it, visitproc visit, void *arg)
3005 {
3006     Py_VISIT(it->it_seq);
3007     return 0;
3008 }
3009 
3010 static PyObject *
striter_next(striterobject * it)3011 striter_next(striterobject *it)
3012 {
3013     PyBytesObject *seq;
3014     PyObject *item;
3015 
3016     assert(it != NULL);
3017     seq = it->it_seq;
3018     if (seq == NULL)
3019         return NULL;
3020     assert(PyBytes_Check(seq));
3021 
3022     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023         item = PyLong_FromLong(
3024             (unsigned char)seq->ob_sval[it->it_index]);
3025         if (item != NULL)
3026             ++it->it_index;
3027         return item;
3028     }
3029 
3030     it->it_seq = NULL;
3031     Py_DECREF(seq);
3032     return NULL;
3033 }
3034 
3035 static PyObject *
striter_len(striterobject * it)3036 striter_len(striterobject *it)
3037 {
3038     Py_ssize_t len = 0;
3039     if (it->it_seq)
3040         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041     return PyLong_FromSsize_t(len);
3042 }
3043 
3044 PyDoc_STRVAR(length_hint_doc,
3045              "Private method returning an estimate of len(list(it)).");
3046 
3047 static PyObject *
striter_reduce(striterobject * it)3048 striter_reduce(striterobject *it)
3049 {
3050     if (it->it_seq != NULL) {
3051         return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3052                              it->it_seq, it->it_index);
3053     } else {
3054         PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3055         if (u == NULL)
3056             return NULL;
3057         return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
3058     }
3059 }
3060 
3061 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3062 
3063 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3064 striter_setstate(striterobject *it, PyObject *state)
3065 {
3066     Py_ssize_t index = PyLong_AsSsize_t(state);
3067     if (index == -1 && PyErr_Occurred())
3068         return NULL;
3069     if (it->it_seq != NULL) {
3070         if (index < 0)
3071             index = 0;
3072         else if (index > PyBytes_GET_SIZE(it->it_seq))
3073             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3074         it->it_index = index;
3075     }
3076     Py_RETURN_NONE;
3077 }
3078 
3079 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3080 
3081 static PyMethodDef striter_methods[] = {
3082     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3083      length_hint_doc},
3084     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3085      reduce_doc},
3086     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3087      setstate_doc},
3088     {NULL,              NULL}           /* sentinel */
3089 };
3090 
3091 PyTypeObject PyBytesIter_Type = {
3092     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3093     "bytes_iterator",                           /* tp_name */
3094     sizeof(striterobject),                      /* tp_basicsize */
3095     0,                                          /* tp_itemsize */
3096     /* methods */
3097     (destructor)striter_dealloc,                /* tp_dealloc */
3098     0,                                          /* tp_print */
3099     0,                                          /* tp_getattr */
3100     0,                                          /* tp_setattr */
3101     0,                                          /* tp_reserved */
3102     0,                                          /* tp_repr */
3103     0,                                          /* tp_as_number */
3104     0,                                          /* tp_as_sequence */
3105     0,                                          /* tp_as_mapping */
3106     0,                                          /* tp_hash */
3107     0,                                          /* tp_call */
3108     0,                                          /* tp_str */
3109     PyObject_GenericGetAttr,                    /* tp_getattro */
3110     0,                                          /* tp_setattro */
3111     0,                                          /* tp_as_buffer */
3112     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3113     0,                                          /* tp_doc */
3114     (traverseproc)striter_traverse,     /* tp_traverse */
3115     0,                                          /* tp_clear */
3116     0,                                          /* tp_richcompare */
3117     0,                                          /* tp_weaklistoffset */
3118     PyObject_SelfIter,                          /* tp_iter */
3119     (iternextfunc)striter_next,                 /* tp_iternext */
3120     striter_methods,                            /* tp_methods */
3121     0,
3122 };
3123 
3124 static PyObject *
bytes_iter(PyObject * seq)3125 bytes_iter(PyObject *seq)
3126 {
3127     striterobject *it;
3128 
3129     if (!PyBytes_Check(seq)) {
3130         PyErr_BadInternalCall();
3131         return NULL;
3132     }
3133     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3134     if (it == NULL)
3135         return NULL;
3136     it->it_index = 0;
3137     Py_INCREF(seq);
3138     it->it_seq = (PyBytesObject *)seq;
3139     _PyObject_GC_TRACK(it);
3140     return (PyObject *)it;
3141 }
3142 
3143 
3144 /* _PyBytesWriter API */
3145 
3146 #ifdef MS_WINDOWS
3147    /* On Windows, overallocate by 50% is the best factor */
3148 #  define OVERALLOCATE_FACTOR 2
3149 #else
3150    /* On Linux, overallocate by 25% is the best factor */
3151 #  define OVERALLOCATE_FACTOR 4
3152 #endif
3153 
3154 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3155 _PyBytesWriter_Init(_PyBytesWriter *writer)
3156 {
3157     /* Set all attributes before small_buffer to 0 */
3158     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3159 #ifdef Py_DEBUG
3160     memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3161 #endif
3162 }
3163 
3164 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3165 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3166 {
3167     Py_CLEAR(writer->buffer);
3168 }
3169 
3170 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3171 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3172 {
3173     if (writer->use_small_buffer) {
3174         assert(writer->buffer == NULL);
3175         return writer->small_buffer;
3176     }
3177     else if (writer->use_bytearray) {
3178         assert(writer->buffer != NULL);
3179         return PyByteArray_AS_STRING(writer->buffer);
3180     }
3181     else {
3182         assert(writer->buffer != NULL);
3183         return PyBytes_AS_STRING(writer->buffer);
3184     }
3185 }
3186 
3187 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3188 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3189 {
3190     char *start = _PyBytesWriter_AsString(writer);
3191     assert(str != NULL);
3192     assert(str >= start);
3193     assert(str - start <= writer->allocated);
3194     return str - start;
3195 }
3196 
3197 Py_LOCAL_INLINE(void)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3198 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3199 {
3200 #ifdef Py_DEBUG
3201     char *start, *end;
3202 
3203     if (writer->use_small_buffer) {
3204         assert(writer->buffer == NULL);
3205     }
3206     else {
3207         assert(writer->buffer != NULL);
3208         if (writer->use_bytearray)
3209             assert(PyByteArray_CheckExact(writer->buffer));
3210         else
3211             assert(PyBytes_CheckExact(writer->buffer));
3212         assert(Py_REFCNT(writer->buffer) == 1);
3213     }
3214 
3215     if (writer->use_bytearray) {
3216         /* bytearray has its own overallocation algorithm,
3217            writer overallocation must be disabled */
3218         assert(!writer->overallocate);
3219     }
3220 
3221     assert(0 <= writer->allocated);
3222     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3223     /* the last byte must always be null */
3224     start = _PyBytesWriter_AsString(writer);
3225     assert(start[writer->allocated] == 0);
3226 
3227     end = start + writer->allocated;
3228     assert(str != NULL);
3229     assert(start <= str && str <= end);
3230 #endif
3231 }
3232 
3233 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3234 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3235 {
3236     Py_ssize_t allocated, pos;
3237 
3238     _PyBytesWriter_CheckConsistency(writer, str);
3239     assert(writer->allocated < size);
3240 
3241     allocated = size;
3242     if (writer->overallocate
3243         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3244         /* overallocate to limit the number of realloc() */
3245         allocated += allocated / OVERALLOCATE_FACTOR;
3246     }
3247 
3248     pos = _PyBytesWriter_GetSize(writer, str);
3249     if (!writer->use_small_buffer) {
3250         if (writer->use_bytearray) {
3251             if (PyByteArray_Resize(writer->buffer, allocated))
3252                 goto error;
3253             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3254                but we cannot use ob_alloc because bytes may need to be moved
3255                to use the whole buffer. bytearray uses an internal optimization
3256                to avoid moving or copying bytes when bytes are removed at the
3257                beginning (ex: del bytearray[:1]). */
3258         }
3259         else {
3260             if (_PyBytes_Resize(&writer->buffer, allocated))
3261                 goto error;
3262         }
3263     }
3264     else {
3265         /* convert from stack buffer to bytes object buffer */
3266         assert(writer->buffer == NULL);
3267 
3268         if (writer->use_bytearray)
3269             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3270         else
3271             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3272         if (writer->buffer == NULL)
3273             goto error;
3274 
3275         if (pos != 0) {
3276             char *dest;
3277             if (writer->use_bytearray)
3278                 dest = PyByteArray_AS_STRING(writer->buffer);
3279             else
3280                 dest = PyBytes_AS_STRING(writer->buffer);
3281             memcpy(dest,
3282                       writer->small_buffer,
3283                       pos);
3284         }
3285 
3286         writer->use_small_buffer = 0;
3287 #ifdef Py_DEBUG
3288         memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3289 #endif
3290     }
3291     writer->allocated = allocated;
3292 
3293     str = _PyBytesWriter_AsString(writer) + pos;
3294     _PyBytesWriter_CheckConsistency(writer, str);
3295     return str;
3296 
3297 error:
3298     _PyBytesWriter_Dealloc(writer);
3299     return NULL;
3300 }
3301 
3302 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3303 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3304 {
3305     Py_ssize_t new_min_size;
3306 
3307     _PyBytesWriter_CheckConsistency(writer, str);
3308     assert(size >= 0);
3309 
3310     if (size == 0) {
3311         /* nothing to do */
3312         return str;
3313     }
3314 
3315     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3316         PyErr_NoMemory();
3317         _PyBytesWriter_Dealloc(writer);
3318         return NULL;
3319     }
3320     new_min_size = writer->min_size + size;
3321 
3322     if (new_min_size > writer->allocated)
3323         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3324 
3325     writer->min_size = new_min_size;
3326     return str;
3327 }
3328 
3329 /* Allocate the buffer to write size bytes.
3330    Return the pointer to the beginning of buffer data.
3331    Raise an exception and return NULL on error. */
3332 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3333 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3334 {
3335     /* ensure that _PyBytesWriter_Alloc() is only called once */
3336     assert(writer->min_size == 0 && writer->buffer == NULL);
3337     assert(size >= 0);
3338 
3339     writer->use_small_buffer = 1;
3340 #ifdef Py_DEBUG
3341     writer->allocated = sizeof(writer->small_buffer) - 1;
3342     /* In debug mode, don't use the full small buffer because it is less
3343        efficient than bytes and bytearray objects to detect buffer underflow
3344        and buffer overflow. Use 10 bytes of the small buffer to test also
3345        code using the smaller buffer in debug mode.
3346 
3347        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3348        in debug mode to also be able to detect stack overflow when running
3349        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3350        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3351        stack overflow. */
3352     writer->allocated = Py_MIN(writer->allocated, 10);
3353     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3354        to detect buffer overflow */
3355     writer->small_buffer[writer->allocated] = 0;
3356 #else
3357     writer->allocated = sizeof(writer->small_buffer);
3358 #endif
3359     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3360 }
3361 
3362 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3363 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3364 {
3365     Py_ssize_t size;
3366     PyObject *result;
3367 
3368     _PyBytesWriter_CheckConsistency(writer, str);
3369 
3370     size = _PyBytesWriter_GetSize(writer, str);
3371     if (size == 0 && !writer->use_bytearray) {
3372         Py_CLEAR(writer->buffer);
3373         /* Get the empty byte string singleton */
3374         result = PyBytes_FromStringAndSize(NULL, 0);
3375     }
3376     else if (writer->use_small_buffer) {
3377         if (writer->use_bytearray) {
3378             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3379         }
3380         else {
3381             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3382         }
3383     }
3384     else {
3385         result = writer->buffer;
3386         writer->buffer = NULL;
3387 
3388         if (size != writer->allocated) {
3389             if (writer->use_bytearray) {
3390                 if (PyByteArray_Resize(result, size)) {
3391                     Py_DECREF(result);
3392                     return NULL;
3393                 }
3394             }
3395             else {
3396                 if (_PyBytes_Resize(&result, size)) {
3397                     assert(result == NULL);
3398                     return NULL;
3399                 }
3400             }
3401         }
3402     }
3403     return result;
3404 }
3405 
3406 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3407 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3408                           const void *bytes, Py_ssize_t size)
3409 {
3410     char *str = (char *)ptr;
3411 
3412     str = _PyBytesWriter_Prepare(writer, str, size);
3413     if (str == NULL)
3414         return NULL;
3415 
3416     memcpy(str, bytes, size);
3417     str += size;
3418 
3419     return str;
3420 }
3421