1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "pycore_abstract.h"      // _PyIndex_Check()
7 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8 #include "pycore_format.h"        // F_LJUST
9 #include "pycore_initconfig.h"    // _PyStatus_OK()
10 #include "pycore_object.h"        // _PyObject_GC_TRACK
11 #include "pycore_pymem.h"         // PYMEM_CLEANBYTE
12 
13 #include "pystrhex.h"
14 #include <stddef.h>
15 
16 /*[clinic input]
17 class bytes "PyBytesObject *" "&PyBytes_Type"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
20 
21 #include "clinic/bytesobject.c.h"
22 
23 _Py_IDENTIFIER(__bytes__);
24 
25 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
26    for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
27 
28    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29    3 or 7 bytes per bytes object allocation on a typical system.
30 */
31 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32 
33 /* Forward declaration */
34 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35                                                    char *str);
36 
37 
38 static struct _Py_bytes_state*
get_bytes_state(void)39 get_bytes_state(void)
40 {
41     PyInterpreterState *interp = _PyInterpreterState_GET();
42     return &interp->bytes;
43 }
44 
45 
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49     struct _Py_bytes_state *state = get_bytes_state();
50     // bytes_get_empty() must not be called before _PyBytes_Init()
51     // or after _PyBytes_Fini()
52     assert(state->empty_string != NULL);
53     return state->empty_string;
54 }
55 
56 
57 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)58 static inline PyObject* bytes_new_empty(void)
59 {
60     PyObject *empty = bytes_get_empty();
61     Py_INCREF(empty);
62     return (PyObject *)empty;
63 }
64 
65 
66 static int
bytes_create_empty_string_singleton(struct _Py_bytes_state * state)67 bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
68 {
69     // Create the empty bytes string singleton
70     PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
71     if (op == NULL) {
72         return -1;
73     }
74     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
75     op->ob_shash = -1;
76     op->ob_sval[0] = '\0';
77 
78     assert(state->empty_string == NULL);
79     state->empty_string = (PyObject *)op;
80     return 0;
81 }
82 
83 
84 /*
85    For PyBytes_FromString(), the parameter `str' points to a null-terminated
86    string containing exactly `size' bytes.
87 
88    For PyBytes_FromStringAndSize(), the parameter `str' is
89    either NULL or else points to a string containing at least `size' bytes.
90    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
91    not have to be null-terminated.  (Therefore it is safe to construct a
92    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
93    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
94    bytes (setting the last byte to the null terminating character) and you can
95    fill in the data yourself.  If `str' is non-NULL then the resulting
96    PyBytes object must be treated as immutable and you must not fill in nor
97    alter the data yourself, since the strings may be shared.
98 
99    The PyObject member `op->ob_size', which denotes the number of "extra
100    items" in a variable-size object, will contain the number of bytes
101    allocated for string data, not counting the null terminating character.
102    It is therefore equal to the `size' parameter (for
103    PyBytes_FromStringAndSize()) or the length of the string in the `str'
104    parameter (for PyBytes_FromString()).
105 */
106 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)107 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
108 {
109     PyBytesObject *op;
110     assert(size >= 0);
111 
112     if (size == 0) {
113         return bytes_new_empty();
114     }
115 
116     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
117         PyErr_SetString(PyExc_OverflowError,
118                         "byte string is too large");
119         return NULL;
120     }
121 
122     /* Inline PyObject_NewVar */
123     if (use_calloc)
124         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
125     else
126         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
127     if (op == NULL) {
128         return PyErr_NoMemory();
129     }
130     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
131     op->ob_shash = -1;
132     if (!use_calloc) {
133         op->ob_sval[size] = '\0';
134     }
135     return (PyObject *) op;
136 }
137 
138 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)139 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
140 {
141     PyBytesObject *op;
142     if (size < 0) {
143         PyErr_SetString(PyExc_SystemError,
144             "Negative size passed to PyBytes_FromStringAndSize");
145         return NULL;
146     }
147     if (size == 1 && str != NULL) {
148         struct _Py_bytes_state *state = get_bytes_state();
149         op = state->characters[*str & UCHAR_MAX];
150         if (op != NULL) {
151             Py_INCREF(op);
152             return (PyObject *)op;
153         }
154     }
155     if (size == 0) {
156         return bytes_new_empty();
157     }
158 
159     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
160     if (op == NULL)
161         return NULL;
162     if (str == NULL)
163         return (PyObject *) op;
164 
165     memcpy(op->ob_sval, str, size);
166     /* share short strings */
167     if (size == 1) {
168         struct _Py_bytes_state *state = get_bytes_state();
169         Py_INCREF(op);
170         state->characters[*str & UCHAR_MAX] = op;
171     }
172     return (PyObject *) op;
173 }
174 
175 PyObject *
PyBytes_FromString(const char * str)176 PyBytes_FromString(const char *str)
177 {
178     size_t size;
179     PyBytesObject *op;
180 
181     assert(str != NULL);
182     size = strlen(str);
183     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
184         PyErr_SetString(PyExc_OverflowError,
185             "byte string is too long");
186         return NULL;
187     }
188 
189     struct _Py_bytes_state *state = get_bytes_state();
190     if (size == 0) {
191         return bytes_new_empty();
192     }
193     else if (size == 1) {
194         op = state->characters[*str & UCHAR_MAX];
195         if (op != NULL) {
196             Py_INCREF(op);
197             return (PyObject *)op;
198         }
199     }
200 
201     /* Inline PyObject_NewVar */
202     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
203     if (op == NULL) {
204         return PyErr_NoMemory();
205     }
206     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
207     op->ob_shash = -1;
208     memcpy(op->ob_sval, str, size+1);
209     /* share short strings */
210     if (size == 1) {
211         assert(state->characters[*str & UCHAR_MAX] == NULL);
212         Py_INCREF(op);
213         state->characters[*str & UCHAR_MAX] = op;
214     }
215     return (PyObject *) op;
216 }
217 
218 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)219 PyBytes_FromFormatV(const char *format, va_list vargs)
220 {
221     char *s;
222     const char *f;
223     const char *p;
224     Py_ssize_t prec;
225     int longflag;
226     int size_tflag;
227     /* Longest 64-bit formatted numbers:
228        - "18446744073709551615\0" (21 bytes)
229        - "-9223372036854775808\0" (21 bytes)
230        Decimal takes the most space (it isn't enough for octal.)
231 
232        Longest 64-bit pointer representation:
233        "0xffffffffffffffff\0" (19 bytes). */
234     char buffer[21];
235     _PyBytesWriter writer;
236 
237     _PyBytesWriter_Init(&writer);
238 
239     s = _PyBytesWriter_Alloc(&writer, strlen(format));
240     if (s == NULL)
241         return NULL;
242     writer.overallocate = 1;
243 
244 #define WRITE_BYTES(str) \
245     do { \
246         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
247         if (s == NULL) \
248             goto error; \
249     } while (0)
250 
251     for (f = format; *f; f++) {
252         if (*f != '%') {
253             *s++ = *f;
254             continue;
255         }
256 
257         p = f++;
258 
259         /* ignore the width (ex: 10 in "%10s") */
260         while (Py_ISDIGIT(*f))
261             f++;
262 
263         /* parse the precision (ex: 10 in "%.10s") */
264         prec = 0;
265         if (*f == '.') {
266             f++;
267             for (; Py_ISDIGIT(*f); f++) {
268                 prec = (prec * 10) + (*f - '0');
269             }
270         }
271 
272         while (*f && *f != '%' && !Py_ISALPHA(*f))
273             f++;
274 
275         /* handle the long flag ('l'), but only for %ld and %lu.
276            others can be added when necessary. */
277         longflag = 0;
278         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279             longflag = 1;
280             ++f;
281         }
282 
283         /* handle the size_t flag ('z'). */
284         size_tflag = 0;
285         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
286             size_tflag = 1;
287             ++f;
288         }
289 
290         /* subtract bytes preallocated for the format string
291            (ex: 2 for "%s") */
292         writer.min_size -= (f - p + 1);
293 
294         switch (*f) {
295         case 'c':
296         {
297             int c = va_arg(vargs, int);
298             if (c < 0 || c > 255) {
299                 PyErr_SetString(PyExc_OverflowError,
300                                 "PyBytes_FromFormatV(): %c format "
301                                 "expects an integer in range [0; 255]");
302                 goto error;
303             }
304             writer.min_size++;
305             *s++ = (unsigned char)c;
306             break;
307         }
308 
309         case 'd':
310             if (longflag) {
311                 sprintf(buffer, "%ld", va_arg(vargs, long));
312             }
313             else if (size_tflag) {
314                 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
315             }
316             else {
317                 sprintf(buffer, "%d", va_arg(vargs, int));
318             }
319             assert(strlen(buffer) < sizeof(buffer));
320             WRITE_BYTES(buffer);
321             break;
322 
323         case 'u':
324             if (longflag) {
325                 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
326             }
327             else if (size_tflag) {
328                 sprintf(buffer, "%zu", va_arg(vargs, size_t));
329             }
330             else {
331                 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
332             }
333             assert(strlen(buffer) < sizeof(buffer));
334             WRITE_BYTES(buffer);
335             break;
336 
337         case 'i':
338             sprintf(buffer, "%i", va_arg(vargs, int));
339             assert(strlen(buffer) < sizeof(buffer));
340             WRITE_BYTES(buffer);
341             break;
342 
343         case 'x':
344             sprintf(buffer, "%x", va_arg(vargs, int));
345             assert(strlen(buffer) < sizeof(buffer));
346             WRITE_BYTES(buffer);
347             break;
348 
349         case 's':
350         {
351             Py_ssize_t i;
352 
353             p = va_arg(vargs, const char*);
354             if (prec <= 0) {
355                 i = strlen(p);
356             }
357             else {
358                 i = 0;
359                 while (i < prec && p[i]) {
360                     i++;
361                 }
362             }
363             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
364             if (s == NULL)
365                 goto error;
366             break;
367         }
368 
369         case 'p':
370             sprintf(buffer, "%p", va_arg(vargs, void*));
371             assert(strlen(buffer) < sizeof(buffer));
372             /* %p is ill-defined:  ensure leading 0x. */
373             if (buffer[1] == 'X')
374                 buffer[1] = 'x';
375             else if (buffer[1] != 'x') {
376                 memmove(buffer+2, buffer, strlen(buffer)+1);
377                 buffer[0] = '0';
378                 buffer[1] = 'x';
379             }
380             WRITE_BYTES(buffer);
381             break;
382 
383         case '%':
384             writer.min_size++;
385             *s++ = '%';
386             break;
387 
388         default:
389             if (*f == 0) {
390                 /* fix min_size if we reached the end of the format string */
391                 writer.min_size++;
392             }
393 
394             /* invalid format string: copy unformatted string and exit */
395             WRITE_BYTES(p);
396             return _PyBytesWriter_Finish(&writer, s);
397         }
398     }
399 
400 #undef WRITE_BYTES
401 
402     return _PyBytesWriter_Finish(&writer, s);
403 
404  error:
405     _PyBytesWriter_Dealloc(&writer);
406     return NULL;
407 }
408 
409 PyObject *
PyBytes_FromFormat(const char * format,...)410 PyBytes_FromFormat(const char *format, ...)
411 {
412     PyObject* ret;
413     va_list vargs;
414 
415 #ifdef HAVE_STDARG_PROTOTYPES
416     va_start(vargs, format);
417 #else
418     va_start(vargs);
419 #endif
420     ret = PyBytes_FromFormatV(format, vargs);
421     va_end(vargs);
422     return ret;
423 }
424 
425 /* Helpers for formatstring */
426 
427 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)428 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
429 {
430     Py_ssize_t argidx = *p_argidx;
431     if (argidx < arglen) {
432         (*p_argidx)++;
433         if (arglen < 0)
434             return args;
435         else
436             return PyTuple_GetItem(args, argidx);
437     }
438     PyErr_SetString(PyExc_TypeError,
439                     "not enough arguments for format string");
440     return NULL;
441 }
442 
443 /* Returns a new reference to a PyBytes object, or NULL on failure. */
444 
445 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)446 formatfloat(PyObject *v, int flags, int prec, int type,
447             PyObject **p_result, _PyBytesWriter *writer, char *str)
448 {
449     char *p;
450     PyObject *result;
451     double x;
452     size_t len;
453 
454     x = PyFloat_AsDouble(v);
455     if (x == -1.0 && PyErr_Occurred()) {
456         PyErr_Format(PyExc_TypeError, "float argument required, "
457                      "not %.200s", Py_TYPE(v)->tp_name);
458         return NULL;
459     }
460 
461     if (prec < 0)
462         prec = 6;
463 
464     p = PyOS_double_to_string(x, type, prec,
465                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
466 
467     if (p == NULL)
468         return NULL;
469 
470     len = strlen(p);
471     if (writer != NULL) {
472         str = _PyBytesWriter_Prepare(writer, str, len);
473         if (str == NULL)
474             return NULL;
475         memcpy(str, p, len);
476         PyMem_Free(p);
477         str += len;
478         return str;
479     }
480 
481     result = PyBytes_FromStringAndSize(p, len);
482     PyMem_Free(p);
483     *p_result = result;
484     return result != NULL ? str : NULL;
485 }
486 
487 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)488 formatlong(PyObject *v, int flags, int prec, int type)
489 {
490     PyObject *result, *iobj;
491     if (type == 'i')
492         type = 'd';
493     if (PyLong_Check(v))
494         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
495     if (PyNumber_Check(v)) {
496         /* make sure number is a type of integer for o, x, and X */
497         if (type == 'o' || type == 'x' || type == 'X')
498             iobj = _PyNumber_Index(v);
499         else
500             iobj = PyNumber_Long(v);
501         if (iobj != NULL) {
502             assert(PyLong_Check(iobj));
503             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
504             Py_DECREF(iobj);
505             return result;
506         }
507         if (!PyErr_ExceptionMatches(PyExc_TypeError))
508             return NULL;
509     }
510     PyErr_Format(PyExc_TypeError,
511         "%%%c format: %s is required, not %.200s", type,
512         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
513                                                     : "a real number",
514         Py_TYPE(v)->tp_name);
515     return NULL;
516 }
517 
518 static int
byte_converter(PyObject * arg,char * p)519 byte_converter(PyObject *arg, char *p)
520 {
521     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
522         *p = PyBytes_AS_STRING(arg)[0];
523         return 1;
524     }
525     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
526         *p = PyByteArray_AS_STRING(arg)[0];
527         return 1;
528     }
529     else {
530         int overflow;
531         long ival = PyLong_AsLongAndOverflow(arg, &overflow);
532         if (ival == -1 && PyErr_Occurred()) {
533             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
534                 goto onError;
535             }
536             return 0;
537         }
538         if (!(0 <= ival && ival <= 255)) {
539             /* this includes an overflow in converting to C long */
540             PyErr_SetString(PyExc_OverflowError,
541                             "%c arg not in range(256)");
542             return 0;
543         }
544         *p = (char)ival;
545         return 1;
546     }
547   onError:
548     PyErr_SetString(PyExc_TypeError,
549         "%c requires an integer in range(256) or a single byte");
550     return 0;
551 }
552 
553 static PyObject *_PyBytes_FromBuffer(PyObject *x);
554 
555 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)556 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
557 {
558     PyObject *func, *result;
559     /* is it a bytes object? */
560     if (PyBytes_Check(v)) {
561         *pbuf = PyBytes_AS_STRING(v);
562         *plen = PyBytes_GET_SIZE(v);
563         Py_INCREF(v);
564         return v;
565     }
566     if (PyByteArray_Check(v)) {
567         *pbuf = PyByteArray_AS_STRING(v);
568         *plen = PyByteArray_GET_SIZE(v);
569         Py_INCREF(v);
570         return v;
571     }
572     /* does it support __bytes__? */
573     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
574     if (func != NULL) {
575         result = _PyObject_CallNoArg(func);
576         Py_DECREF(func);
577         if (result == NULL)
578             return NULL;
579         if (!PyBytes_Check(result)) {
580             PyErr_Format(PyExc_TypeError,
581                          "__bytes__ returned non-bytes (type %.200s)",
582                          Py_TYPE(result)->tp_name);
583             Py_DECREF(result);
584             return NULL;
585         }
586         *pbuf = PyBytes_AS_STRING(result);
587         *plen = PyBytes_GET_SIZE(result);
588         return result;
589     }
590     /* does it support buffer protocol? */
591     if (PyObject_CheckBuffer(v)) {
592         /* maybe we can avoid making a copy of the buffer object here? */
593         result = _PyBytes_FromBuffer(v);
594         if (result == NULL)
595             return NULL;
596         *pbuf = PyBytes_AS_STRING(result);
597         *plen = PyBytes_GET_SIZE(result);
598         return result;
599     }
600     PyErr_Format(PyExc_TypeError,
601                  "%%b requires a bytes-like object, "
602                  "or an object that implements __bytes__, not '%.100s'",
603                  Py_TYPE(v)->tp_name);
604     return NULL;
605 }
606 
607 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
608 
609 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)610 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
611                   PyObject *args, int use_bytearray)
612 {
613     const char *fmt;
614     char *res;
615     Py_ssize_t arglen, argidx;
616     Py_ssize_t fmtcnt;
617     int args_owned = 0;
618     PyObject *dict = NULL;
619     _PyBytesWriter writer;
620 
621     if (args == NULL) {
622         PyErr_BadInternalCall();
623         return NULL;
624     }
625     fmt = format;
626     fmtcnt = format_len;
627 
628     _PyBytesWriter_Init(&writer);
629     writer.use_bytearray = use_bytearray;
630 
631     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
632     if (res == NULL)
633         return NULL;
634     if (!use_bytearray)
635         writer.overallocate = 1;
636 
637     if (PyTuple_Check(args)) {
638         arglen = PyTuple_GET_SIZE(args);
639         argidx = 0;
640     }
641     else {
642         arglen = -1;
643         argidx = -2;
644     }
645     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
646         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
647         !PyByteArray_Check(args)) {
648             dict = args;
649     }
650 
651     while (--fmtcnt >= 0) {
652         if (*fmt != '%') {
653             Py_ssize_t len;
654             char *pos;
655 
656             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
657             if (pos != NULL)
658                 len = pos - fmt;
659             else
660                 len = fmtcnt + 1;
661             assert(len != 0);
662 
663             memcpy(res, fmt, len);
664             res += len;
665             fmt += len;
666             fmtcnt -= (len - 1);
667         }
668         else {
669             /* Got a format specifier */
670             int flags = 0;
671             Py_ssize_t width = -1;
672             int prec = -1;
673             int c = '\0';
674             int fill;
675             PyObject *v = NULL;
676             PyObject *temp = NULL;
677             const char *pbuf = NULL;
678             int sign;
679             Py_ssize_t len = 0;
680             char onechar; /* For byte_converter() */
681             Py_ssize_t alloc;
682 
683             fmt++;
684             if (*fmt == '%') {
685                 *res++ = '%';
686                 fmt++;
687                 fmtcnt--;
688                 continue;
689             }
690             if (*fmt == '(') {
691                 const char *keystart;
692                 Py_ssize_t keylen;
693                 PyObject *key;
694                 int pcount = 1;
695 
696                 if (dict == NULL) {
697                     PyErr_SetString(PyExc_TypeError,
698                              "format requires a mapping");
699                     goto error;
700                 }
701                 ++fmt;
702                 --fmtcnt;
703                 keystart = fmt;
704                 /* Skip over balanced parentheses */
705                 while (pcount > 0 && --fmtcnt >= 0) {
706                     if (*fmt == ')')
707                         --pcount;
708                     else if (*fmt == '(')
709                         ++pcount;
710                     fmt++;
711                 }
712                 keylen = fmt - keystart - 1;
713                 if (fmtcnt < 0 || pcount > 0) {
714                     PyErr_SetString(PyExc_ValueError,
715                                "incomplete format key");
716                     goto error;
717                 }
718                 key = PyBytes_FromStringAndSize(keystart,
719                                                  keylen);
720                 if (key == NULL)
721                     goto error;
722                 if (args_owned) {
723                     Py_DECREF(args);
724                     args_owned = 0;
725                 }
726                 args = PyObject_GetItem(dict, key);
727                 Py_DECREF(key);
728                 if (args == NULL) {
729                     goto error;
730                 }
731                 args_owned = 1;
732                 arglen = -1;
733                 argidx = -2;
734             }
735 
736             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
737             while (--fmtcnt >= 0) {
738                 switch (c = *fmt++) {
739                 case '-': flags |= F_LJUST; continue;
740                 case '+': flags |= F_SIGN; continue;
741                 case ' ': flags |= F_BLANK; continue;
742                 case '#': flags |= F_ALT; continue;
743                 case '0': flags |= F_ZERO; continue;
744                 }
745                 break;
746             }
747 
748             /* Parse width. Example: "%10s" => width=10 */
749             if (c == '*') {
750                 v = getnextarg(args, arglen, &argidx);
751                 if (v == NULL)
752                     goto error;
753                 if (!PyLong_Check(v)) {
754                     PyErr_SetString(PyExc_TypeError,
755                                     "* wants int");
756                     goto error;
757                 }
758                 width = PyLong_AsSsize_t(v);
759                 if (width == -1 && PyErr_Occurred())
760                     goto error;
761                 if (width < 0) {
762                     flags |= F_LJUST;
763                     width = -width;
764                 }
765                 if (--fmtcnt >= 0)
766                     c = *fmt++;
767             }
768             else if (c >= 0 && isdigit(c)) {
769                 width = c - '0';
770                 while (--fmtcnt >= 0) {
771                     c = Py_CHARMASK(*fmt++);
772                     if (!isdigit(c))
773                         break;
774                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
775                         PyErr_SetString(
776                             PyExc_ValueError,
777                             "width too big");
778                         goto error;
779                     }
780                     width = width*10 + (c - '0');
781                 }
782             }
783 
784             /* Parse precision. Example: "%.3f" => prec=3 */
785             if (c == '.') {
786                 prec = 0;
787                 if (--fmtcnt >= 0)
788                     c = *fmt++;
789                 if (c == '*') {
790                     v = getnextarg(args, arglen, &argidx);
791                     if (v == NULL)
792                         goto error;
793                     if (!PyLong_Check(v)) {
794                         PyErr_SetString(
795                             PyExc_TypeError,
796                             "* wants int");
797                         goto error;
798                     }
799                     prec = _PyLong_AsInt(v);
800                     if (prec == -1 && PyErr_Occurred())
801                         goto error;
802                     if (prec < 0)
803                         prec = 0;
804                     if (--fmtcnt >= 0)
805                         c = *fmt++;
806                 }
807                 else if (c >= 0 && isdigit(c)) {
808                     prec = c - '0';
809                     while (--fmtcnt >= 0) {
810                         c = Py_CHARMASK(*fmt++);
811                         if (!isdigit(c))
812                             break;
813                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
814                             PyErr_SetString(
815                                 PyExc_ValueError,
816                                 "prec too big");
817                             goto error;
818                         }
819                         prec = prec*10 + (c - '0');
820                     }
821                 }
822             } /* prec */
823             if (fmtcnt >= 0) {
824                 if (c == 'h' || c == 'l' || c == 'L') {
825                     if (--fmtcnt >= 0)
826                         c = *fmt++;
827                 }
828             }
829             if (fmtcnt < 0) {
830                 PyErr_SetString(PyExc_ValueError,
831                                 "incomplete format");
832                 goto error;
833             }
834             v = getnextarg(args, arglen, &argidx);
835             if (v == NULL)
836                 goto error;
837 
838             if (fmtcnt == 0) {
839                 /* last write: disable writer overallocation */
840                 writer.overallocate = 0;
841             }
842 
843             sign = 0;
844             fill = ' ';
845             switch (c) {
846             case 'r':
847                 // %r is only for 2/3 code; 3 only code should use %a
848             case 'a':
849                 temp = PyObject_ASCII(v);
850                 if (temp == NULL)
851                     goto error;
852                 assert(PyUnicode_IS_ASCII(temp));
853                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
854                 len = PyUnicode_GET_LENGTH(temp);
855                 if (prec >= 0 && len > prec)
856                     len = prec;
857                 break;
858 
859             case 's':
860                 // %s is only for 2/3 code; 3 only code should use %b
861             case 'b':
862                 temp = format_obj(v, &pbuf, &len);
863                 if (temp == NULL)
864                     goto error;
865                 if (prec >= 0 && len > prec)
866                     len = prec;
867                 break;
868 
869             case 'i':
870             case 'd':
871             case 'u':
872             case 'o':
873             case 'x':
874             case 'X':
875                 if (PyLong_CheckExact(v)
876                     && width == -1 && prec == -1
877                     && !(flags & (F_SIGN | F_BLANK))
878                     && c != 'X')
879                 {
880                     /* Fast path */
881                     int alternate = flags & F_ALT;
882                     int base;
883 
884                     switch(c)
885                     {
886                         default:
887                             Py_UNREACHABLE();
888                         case 'd':
889                         case 'i':
890                         case 'u':
891                             base = 10;
892                             break;
893                         case 'o':
894                             base = 8;
895                             break;
896                         case 'x':
897                         case 'X':
898                             base = 16;
899                             break;
900                     }
901 
902                     /* Fast path */
903                     writer.min_size -= 2; /* size preallocated for "%d" */
904                     res = _PyLong_FormatBytesWriter(&writer, res,
905                                                     v, base, alternate);
906                     if (res == NULL)
907                         goto error;
908                     continue;
909                 }
910 
911                 temp = formatlong(v, flags, prec, c);
912                 if (!temp)
913                     goto error;
914                 assert(PyUnicode_IS_ASCII(temp));
915                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
916                 len = PyUnicode_GET_LENGTH(temp);
917                 sign = 1;
918                 if (flags & F_ZERO)
919                     fill = '0';
920                 break;
921 
922             case 'e':
923             case 'E':
924             case 'f':
925             case 'F':
926             case 'g':
927             case 'G':
928                 if (width == -1 && prec == -1
929                     && !(flags & (F_SIGN | F_BLANK)))
930                 {
931                     /* Fast path */
932                     writer.min_size -= 2; /* size preallocated for "%f" */
933                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
934                     if (res == NULL)
935                         goto error;
936                     continue;
937                 }
938 
939                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
940                     goto error;
941                 pbuf = PyBytes_AS_STRING(temp);
942                 len = PyBytes_GET_SIZE(temp);
943                 sign = 1;
944                 if (flags & F_ZERO)
945                     fill = '0';
946                 break;
947 
948             case 'c':
949                 pbuf = &onechar;
950                 len = byte_converter(v, &onechar);
951                 if (!len)
952                     goto error;
953                 if (width == -1) {
954                     /* Fast path */
955                     *res++ = onechar;
956                     continue;
957                 }
958                 break;
959 
960             default:
961                 PyErr_Format(PyExc_ValueError,
962                   "unsupported format character '%c' (0x%x) "
963                   "at index %zd",
964                   c, c,
965                   (Py_ssize_t)(fmt - 1 - format));
966                 goto error;
967             }
968 
969             if (sign) {
970                 if (*pbuf == '-' || *pbuf == '+') {
971                     sign = *pbuf++;
972                     len--;
973                 }
974                 else if (flags & F_SIGN)
975                     sign = '+';
976                 else if (flags & F_BLANK)
977                     sign = ' ';
978                 else
979                     sign = 0;
980             }
981             if (width < len)
982                 width = len;
983 
984             alloc = width;
985             if (sign != 0 && len == width)
986                 alloc++;
987             /* 2: size preallocated for %s */
988             if (alloc > 2) {
989                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
990                 if (res == NULL)
991                     goto error;
992             }
993 #ifndef NDEBUG
994             char *before = res;
995 #endif
996 
997             /* Write the sign if needed */
998             if (sign) {
999                 if (fill != ' ')
1000                     *res++ = sign;
1001                 if (width > len)
1002                     width--;
1003             }
1004 
1005             /* Write the numeric prefix for "x", "X" and "o" formats
1006                if the alternate form is used.
1007                For example, write "0x" for the "%#x" format. */
1008             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1009                 assert(pbuf[0] == '0');
1010                 assert(pbuf[1] == c);
1011                 if (fill != ' ') {
1012                     *res++ = *pbuf++;
1013                     *res++ = *pbuf++;
1014                 }
1015                 width -= 2;
1016                 if (width < 0)
1017                     width = 0;
1018                 len -= 2;
1019             }
1020 
1021             /* Pad left with the fill character if needed */
1022             if (width > len && !(flags & F_LJUST)) {
1023                 memset(res, fill, width - len);
1024                 res += (width - len);
1025                 width = len;
1026             }
1027 
1028             /* If padding with spaces: write sign if needed and/or numeric
1029                prefix if the alternate form is used */
1030             if (fill == ' ') {
1031                 if (sign)
1032                     *res++ = sign;
1033                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1034                     assert(pbuf[0] == '0');
1035                     assert(pbuf[1] == c);
1036                     *res++ = *pbuf++;
1037                     *res++ = *pbuf++;
1038                 }
1039             }
1040 
1041             /* Copy bytes */
1042             memcpy(res, pbuf, len);
1043             res += len;
1044 
1045             /* Pad right with the fill character if needed */
1046             if (width > len) {
1047                 memset(res, ' ', width - len);
1048                 res += (width - len);
1049             }
1050 
1051             if (dict && (argidx < arglen)) {
1052                 PyErr_SetString(PyExc_TypeError,
1053                            "not all arguments converted during bytes formatting");
1054                 Py_XDECREF(temp);
1055                 goto error;
1056             }
1057             Py_XDECREF(temp);
1058 
1059 #ifndef NDEBUG
1060             /* check that we computed the exact size for this write */
1061             assert((res - before) == alloc);
1062 #endif
1063         } /* '%' */
1064 
1065         /* If overallocation was disabled, ensure that it was the last
1066            write. Otherwise, we missed an optimization */
1067         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1068     } /* until end */
1069 
1070     if (argidx < arglen && !dict) {
1071         PyErr_SetString(PyExc_TypeError,
1072                         "not all arguments converted during bytes formatting");
1073         goto error;
1074     }
1075 
1076     if (args_owned) {
1077         Py_DECREF(args);
1078     }
1079     return _PyBytesWriter_Finish(&writer, res);
1080 
1081  error:
1082     _PyBytesWriter_Dealloc(&writer);
1083     if (args_owned) {
1084         Py_DECREF(args);
1085     }
1086     return NULL;
1087 }
1088 
1089 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1090 PyObject *_PyBytes_DecodeEscape(const char *s,
1091                                 Py_ssize_t len,
1092                                 const char *errors,
1093                                 const char **first_invalid_escape)
1094 {
1095     int c;
1096     char *p;
1097     const char *end;
1098     _PyBytesWriter writer;
1099 
1100     _PyBytesWriter_Init(&writer);
1101 
1102     p = _PyBytesWriter_Alloc(&writer, len);
1103     if (p == NULL)
1104         return NULL;
1105     writer.overallocate = 1;
1106 
1107     *first_invalid_escape = NULL;
1108 
1109     end = s + len;
1110     while (s < end) {
1111         if (*s != '\\') {
1112             *p++ = *s++;
1113             continue;
1114         }
1115 
1116         s++;
1117         if (s == end) {
1118             PyErr_SetString(PyExc_ValueError,
1119                             "Trailing \\ in string");
1120             goto failed;
1121         }
1122 
1123         switch (*s++) {
1124         /* XXX This assumes ASCII! */
1125         case '\n': break;
1126         case '\\': *p++ = '\\'; break;
1127         case '\'': *p++ = '\''; break;
1128         case '\"': *p++ = '\"'; break;
1129         case 'b': *p++ = '\b'; break;
1130         case 'f': *p++ = '\014'; break; /* FF */
1131         case 't': *p++ = '\t'; break;
1132         case 'n': *p++ = '\n'; break;
1133         case 'r': *p++ = '\r'; break;
1134         case 'v': *p++ = '\013'; break; /* VT */
1135         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1136         case '0': case '1': case '2': case '3':
1137         case '4': case '5': case '6': case '7':
1138             c = s[-1] - '0';
1139             if (s < end && '0' <= *s && *s <= '7') {
1140                 c = (c<<3) + *s++ - '0';
1141                 if (s < end && '0' <= *s && *s <= '7')
1142                     c = (c<<3) + *s++ - '0';
1143             }
1144             *p++ = c;
1145             break;
1146         case 'x':
1147             if (s+1 < end) {
1148                 int digit1, digit2;
1149                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1150                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1151                 if (digit1 < 16 && digit2 < 16) {
1152                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1153                     s += 2;
1154                     break;
1155                 }
1156             }
1157             /* invalid hexadecimal digits */
1158 
1159             if (!errors || strcmp(errors, "strict") == 0) {
1160                 PyErr_Format(PyExc_ValueError,
1161                              "invalid \\x escape at position %zd",
1162                              s - 2 - (end - len));
1163                 goto failed;
1164             }
1165             if (strcmp(errors, "replace") == 0) {
1166                 *p++ = '?';
1167             } else if (strcmp(errors, "ignore") == 0)
1168                 /* do nothing */;
1169             else {
1170                 PyErr_Format(PyExc_ValueError,
1171                              "decoding error; unknown "
1172                              "error handling code: %.400s",
1173                              errors);
1174                 goto failed;
1175             }
1176             /* skip \x */
1177             if (s < end && Py_ISXDIGIT(s[0]))
1178                 s++; /* and a hexdigit */
1179             break;
1180 
1181         default:
1182             if (*first_invalid_escape == NULL) {
1183                 *first_invalid_escape = s-1; /* Back up one char, since we've
1184                                                 already incremented s. */
1185             }
1186             *p++ = '\\';
1187             s--;
1188         }
1189     }
1190 
1191     return _PyBytesWriter_Finish(&writer, p);
1192 
1193   failed:
1194     _PyBytesWriter_Dealloc(&writer);
1195     return NULL;
1196 }
1197 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1198 PyObject *PyBytes_DecodeEscape(const char *s,
1199                                 Py_ssize_t len,
1200                                 const char *errors,
1201                                 Py_ssize_t Py_UNUSED(unicode),
1202                                 const char *Py_UNUSED(recode_encoding))
1203 {
1204     const char* first_invalid_escape;
1205     PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1206                                              &first_invalid_escape);
1207     if (result == NULL)
1208         return NULL;
1209     if (first_invalid_escape != NULL) {
1210         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1211                              "invalid escape sequence '\\%c'",
1212                              (unsigned char)*first_invalid_escape) < 0) {
1213             Py_DECREF(result);
1214             return NULL;
1215         }
1216     }
1217     return result;
1218 
1219 }
1220 /* -------------------------------------------------------------------- */
1221 /* object api */
1222 
1223 Py_ssize_t
PyBytes_Size(PyObject * op)1224 PyBytes_Size(PyObject *op)
1225 {
1226     if (!PyBytes_Check(op)) {
1227         PyErr_Format(PyExc_TypeError,
1228              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229         return -1;
1230     }
1231     return Py_SIZE(op);
1232 }
1233 
1234 char *
PyBytes_AsString(PyObject * op)1235 PyBytes_AsString(PyObject *op)
1236 {
1237     if (!PyBytes_Check(op)) {
1238         PyErr_Format(PyExc_TypeError,
1239              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1240         return NULL;
1241     }
1242     return ((PyBytesObject *)op)->ob_sval;
1243 }
1244 
1245 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1246 PyBytes_AsStringAndSize(PyObject *obj,
1247                          char **s,
1248                          Py_ssize_t *len)
1249 {
1250     if (s == NULL) {
1251         PyErr_BadInternalCall();
1252         return -1;
1253     }
1254 
1255     if (!PyBytes_Check(obj)) {
1256         PyErr_Format(PyExc_TypeError,
1257              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1258         return -1;
1259     }
1260 
1261     *s = PyBytes_AS_STRING(obj);
1262     if (len != NULL)
1263         *len = PyBytes_GET_SIZE(obj);
1264     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1265         PyErr_SetString(PyExc_ValueError,
1266                         "embedded null byte");
1267         return -1;
1268     }
1269     return 0;
1270 }
1271 
1272 /* -------------------------------------------------------------------- */
1273 /* Methods */
1274 
1275 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1276 
1277 #include "stringlib/stringdefs.h"
1278 
1279 #include "stringlib/fastsearch.h"
1280 #include "stringlib/count.h"
1281 #include "stringlib/find.h"
1282 #include "stringlib/join.h"
1283 #include "stringlib/partition.h"
1284 #include "stringlib/split.h"
1285 #include "stringlib/ctype.h"
1286 
1287 #include "stringlib/transmogrify.h"
1288 
1289 #undef STRINGLIB_GET_EMPTY
1290 
1291 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1292 PyBytes_Repr(PyObject *obj, int smartquotes)
1293 {
1294     PyBytesObject* op = (PyBytesObject*) obj;
1295     Py_ssize_t i, length = Py_SIZE(op);
1296     Py_ssize_t newsize, squotes, dquotes;
1297     PyObject *v;
1298     unsigned char quote;
1299     const unsigned char *s;
1300     Py_UCS1 *p;
1301 
1302     /* Compute size of output string */
1303     squotes = dquotes = 0;
1304     newsize = 3; /* b'' */
1305     s = (const unsigned char*)op->ob_sval;
1306     for (i = 0; i < length; i++) {
1307         Py_ssize_t incr = 1;
1308         switch(s[i]) {
1309         case '\'': squotes++; break;
1310         case '"':  dquotes++; break;
1311         case '\\': case '\t': case '\n': case '\r':
1312             incr = 2; break; /* \C */
1313         default:
1314             if (s[i] < ' ' || s[i] >= 0x7f)
1315                 incr = 4; /* \xHH */
1316         }
1317         if (newsize > PY_SSIZE_T_MAX - incr)
1318             goto overflow;
1319         newsize += incr;
1320     }
1321     quote = '\'';
1322     if (smartquotes && squotes && !dquotes)
1323         quote = '"';
1324     if (squotes && quote == '\'') {
1325         if (newsize > PY_SSIZE_T_MAX - squotes)
1326             goto overflow;
1327         newsize += squotes;
1328     }
1329 
1330     v = PyUnicode_New(newsize, 127);
1331     if (v == NULL) {
1332         return NULL;
1333     }
1334     p = PyUnicode_1BYTE_DATA(v);
1335 
1336     *p++ = 'b', *p++ = quote;
1337     for (i = 0; i < length; i++) {
1338         unsigned char c = op->ob_sval[i];
1339         if (c == quote || c == '\\')
1340             *p++ = '\\', *p++ = c;
1341         else if (c == '\t')
1342             *p++ = '\\', *p++ = 't';
1343         else if (c == '\n')
1344             *p++ = '\\', *p++ = 'n';
1345         else if (c == '\r')
1346             *p++ = '\\', *p++ = 'r';
1347         else if (c < ' ' || c >= 0x7f) {
1348             *p++ = '\\';
1349             *p++ = 'x';
1350             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1351             *p++ = Py_hexdigits[c & 0xf];
1352         }
1353         else
1354             *p++ = c;
1355     }
1356     *p++ = quote;
1357     assert(_PyUnicode_CheckConsistency(v, 1));
1358     return v;
1359 
1360   overflow:
1361     PyErr_SetString(PyExc_OverflowError,
1362                     "bytes object is too large to make repr");
1363     return NULL;
1364 }
1365 
1366 static PyObject *
bytes_repr(PyObject * op)1367 bytes_repr(PyObject *op)
1368 {
1369     return PyBytes_Repr(op, 1);
1370 }
1371 
1372 static PyObject *
bytes_str(PyObject * op)1373 bytes_str(PyObject *op)
1374 {
1375     if (_Py_GetConfig()->bytes_warning) {
1376         if (PyErr_WarnEx(PyExc_BytesWarning,
1377                          "str() on a bytes instance", 1)) {
1378             return NULL;
1379         }
1380     }
1381     return bytes_repr(op);
1382 }
1383 
1384 static Py_ssize_t
bytes_length(PyBytesObject * a)1385 bytes_length(PyBytesObject *a)
1386 {
1387     return Py_SIZE(a);
1388 }
1389 
1390 /* This is also used by PyBytes_Concat() */
1391 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1392 bytes_concat(PyObject *a, PyObject *b)
1393 {
1394     Py_buffer va, vb;
1395     PyObject *result = NULL;
1396 
1397     va.len = -1;
1398     vb.len = -1;
1399     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1400         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1401         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1402                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1403         goto done;
1404     }
1405 
1406     /* Optimize end cases */
1407     if (va.len == 0 && PyBytes_CheckExact(b)) {
1408         result = b;
1409         Py_INCREF(result);
1410         goto done;
1411     }
1412     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1413         result = a;
1414         Py_INCREF(result);
1415         goto done;
1416     }
1417 
1418     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1419         PyErr_NoMemory();
1420         goto done;
1421     }
1422 
1423     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1424     if (result != NULL) {
1425         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1426         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1427     }
1428 
1429   done:
1430     if (va.len != -1)
1431         PyBuffer_Release(&va);
1432     if (vb.len != -1)
1433         PyBuffer_Release(&vb);
1434     return result;
1435 }
1436 
1437 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1438 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1439 {
1440     Py_ssize_t i;
1441     Py_ssize_t j;
1442     Py_ssize_t size;
1443     PyBytesObject *op;
1444     size_t nbytes;
1445     if (n < 0)
1446         n = 0;
1447     /* watch out for overflows:  the size can overflow int,
1448      * and the # of bytes needed can overflow size_t
1449      */
1450     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1451         PyErr_SetString(PyExc_OverflowError,
1452             "repeated bytes are too long");
1453         return NULL;
1454     }
1455     size = Py_SIZE(a) * n;
1456     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1457         Py_INCREF(a);
1458         return (PyObject *)a;
1459     }
1460     nbytes = (size_t)size;
1461     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1462         PyErr_SetString(PyExc_OverflowError,
1463             "repeated bytes are too long");
1464         return NULL;
1465     }
1466     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1467     if (op == NULL) {
1468         return PyErr_NoMemory();
1469     }
1470     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1471     op->ob_shash = -1;
1472     op->ob_sval[size] = '\0';
1473     if (Py_SIZE(a) == 1 && n > 0) {
1474         memset(op->ob_sval, a->ob_sval[0] , n);
1475         return (PyObject *) op;
1476     }
1477     i = 0;
1478     if (i < size) {
1479         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1480         i = Py_SIZE(a);
1481     }
1482     while (i < size) {
1483         j = (i <= size-i)  ?  i  :  size-i;
1484         memcpy(op->ob_sval+i, op->ob_sval, j);
1485         i += j;
1486     }
1487     return (PyObject *) op;
1488 }
1489 
1490 static int
bytes_contains(PyObject * self,PyObject * arg)1491 bytes_contains(PyObject *self, PyObject *arg)
1492 {
1493     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1494 }
1495 
1496 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1497 bytes_item(PyBytesObject *a, Py_ssize_t i)
1498 {
1499     if (i < 0 || i >= Py_SIZE(a)) {
1500         PyErr_SetString(PyExc_IndexError, "index out of range");
1501         return NULL;
1502     }
1503     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1504 }
1505 
1506 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1507 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1508 {
1509     int cmp;
1510     Py_ssize_t len;
1511 
1512     len = Py_SIZE(a);
1513     if (Py_SIZE(b) != len)
1514         return 0;
1515 
1516     if (a->ob_sval[0] != b->ob_sval[0])
1517         return 0;
1518 
1519     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520     return (cmp == 0);
1521 }
1522 
1523 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1524 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1525 {
1526     int c;
1527     Py_ssize_t len_a, len_b;
1528     Py_ssize_t min_len;
1529 
1530     /* Make sure both arguments are strings. */
1531     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1532         if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1533             if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1534                 if (PyErr_WarnEx(PyExc_BytesWarning,
1535                                  "Comparison between bytes and string", 1))
1536                     return NULL;
1537             }
1538             if (PyLong_Check(a) || PyLong_Check(b)) {
1539                 if (PyErr_WarnEx(PyExc_BytesWarning,
1540                                  "Comparison between bytes and int", 1))
1541                     return NULL;
1542             }
1543         }
1544         Py_RETURN_NOTIMPLEMENTED;
1545     }
1546     else if (a == b) {
1547         switch (op) {
1548         case Py_EQ:
1549         case Py_LE:
1550         case Py_GE:
1551             /* a byte string is equal to itself */
1552             Py_RETURN_TRUE;
1553         case Py_NE:
1554         case Py_LT:
1555         case Py_GT:
1556             Py_RETURN_FALSE;
1557         default:
1558             PyErr_BadArgument();
1559             return NULL;
1560         }
1561     }
1562     else if (op == Py_EQ || op == Py_NE) {
1563         int eq = bytes_compare_eq(a, b);
1564         eq ^= (op == Py_NE);
1565         return PyBool_FromLong(eq);
1566     }
1567     else {
1568         len_a = Py_SIZE(a);
1569         len_b = Py_SIZE(b);
1570         min_len = Py_MIN(len_a, len_b);
1571         if (min_len > 0) {
1572             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1573             if (c == 0)
1574                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1575         }
1576         else
1577             c = 0;
1578         if (c != 0)
1579             Py_RETURN_RICHCOMPARE(c, 0, op);
1580         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1581     }
1582 }
1583 
1584 static Py_hash_t
bytes_hash(PyBytesObject * a)1585 bytes_hash(PyBytesObject *a)
1586 {
1587     if (a->ob_shash == -1) {
1588         /* Can't fail */
1589         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1590     }
1591     return a->ob_shash;
1592 }
1593 
1594 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1595 bytes_subscript(PyBytesObject* self, PyObject* item)
1596 {
1597     if (_PyIndex_Check(item)) {
1598         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1599         if (i == -1 && PyErr_Occurred())
1600             return NULL;
1601         if (i < 0)
1602             i += PyBytes_GET_SIZE(self);
1603         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1604             PyErr_SetString(PyExc_IndexError,
1605                             "index out of range");
1606             return NULL;
1607         }
1608         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1609     }
1610     else if (PySlice_Check(item)) {
1611         Py_ssize_t start, stop, step, slicelength, i;
1612         size_t cur;
1613         const char* source_buf;
1614         char* result_buf;
1615         PyObject* result;
1616 
1617         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1618             return NULL;
1619         }
1620         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1621                                             &stop, step);
1622 
1623         if (slicelength <= 0) {
1624             return PyBytes_FromStringAndSize("", 0);
1625         }
1626         else if (start == 0 && step == 1 &&
1627                  slicelength == PyBytes_GET_SIZE(self) &&
1628                  PyBytes_CheckExact(self)) {
1629             Py_INCREF(self);
1630             return (PyObject *)self;
1631         }
1632         else if (step == 1) {
1633             return PyBytes_FromStringAndSize(
1634                 PyBytes_AS_STRING(self) + start,
1635                 slicelength);
1636         }
1637         else {
1638             source_buf = PyBytes_AS_STRING(self);
1639             result = PyBytes_FromStringAndSize(NULL, slicelength);
1640             if (result == NULL)
1641                 return NULL;
1642 
1643             result_buf = PyBytes_AS_STRING(result);
1644             for (cur = start, i = 0; i < slicelength;
1645                  cur += step, i++) {
1646                 result_buf[i] = source_buf[cur];
1647             }
1648 
1649             return result;
1650         }
1651     }
1652     else {
1653         PyErr_Format(PyExc_TypeError,
1654                      "byte indices must be integers or slices, not %.200s",
1655                      Py_TYPE(item)->tp_name);
1656         return NULL;
1657     }
1658 }
1659 
1660 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1661 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1662 {
1663     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1664                              1, flags);
1665 }
1666 
1667 static PySequenceMethods bytes_as_sequence = {
1668     (lenfunc)bytes_length, /*sq_length*/
1669     (binaryfunc)bytes_concat, /*sq_concat*/
1670     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1671     (ssizeargfunc)bytes_item, /*sq_item*/
1672     0,                  /*sq_slice*/
1673     0,                  /*sq_ass_item*/
1674     0,                  /*sq_ass_slice*/
1675     (objobjproc)bytes_contains /*sq_contains*/
1676 };
1677 
1678 static PyMappingMethods bytes_as_mapping = {
1679     (lenfunc)bytes_length,
1680     (binaryfunc)bytes_subscript,
1681     0,
1682 };
1683 
1684 static PyBufferProcs bytes_as_buffer = {
1685     (getbufferproc)bytes_buffer_getbuffer,
1686     NULL,
1687 };
1688 
1689 
1690 #define LEFTSTRIP 0
1691 #define RIGHTSTRIP 1
1692 #define BOTHSTRIP 2
1693 
1694 /*[clinic input]
1695 bytes.split
1696 
1697     sep: object = None
1698         The delimiter according which to split the bytes.
1699         None (the default value) means split on ASCII whitespace characters
1700         (space, tab, return, newline, formfeed, vertical tab).
1701     maxsplit: Py_ssize_t = -1
1702         Maximum number of splits to do.
1703         -1 (the default value) means no limit.
1704 
1705 Return a list of the sections in the bytes, using sep as the delimiter.
1706 [clinic start generated code]*/
1707 
1708 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1709 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1710 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1711 {
1712     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1713     const char *s = PyBytes_AS_STRING(self), *sub;
1714     Py_buffer vsub;
1715     PyObject *list;
1716 
1717     if (maxsplit < 0)
1718         maxsplit = PY_SSIZE_T_MAX;
1719     if (sep == Py_None)
1720         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1721     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1722         return NULL;
1723     sub = vsub.buf;
1724     n = vsub.len;
1725 
1726     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1727     PyBuffer_Release(&vsub);
1728     return list;
1729 }
1730 
1731 /*[clinic input]
1732 bytes.partition
1733 
1734     sep: Py_buffer
1735     /
1736 
1737 Partition the bytes into three parts using the given separator.
1738 
1739 This will search for the separator sep in the bytes. If the separator is found,
1740 returns a 3-tuple containing the part before the separator, the separator
1741 itself, and the part after it.
1742 
1743 If the separator is not found, returns a 3-tuple containing the original bytes
1744 object and two empty bytes objects.
1745 [clinic start generated code]*/
1746 
1747 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1748 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1749 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1750 {
1751     return stringlib_partition(
1752         (PyObject*) self,
1753         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1754         sep->obj, (const char *)sep->buf, sep->len
1755         );
1756 }
1757 
1758 /*[clinic input]
1759 bytes.rpartition
1760 
1761     sep: Py_buffer
1762     /
1763 
1764 Partition the bytes into three parts using the given separator.
1765 
1766 This will search for the separator sep in the bytes, starting at the end. If
1767 the separator is found, returns a 3-tuple containing the part before the
1768 separator, the separator itself, and the part after it.
1769 
1770 If the separator is not found, returns a 3-tuple containing two empty bytes
1771 objects and the original bytes object.
1772 [clinic start generated code]*/
1773 
1774 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1775 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1776 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1777 {
1778     return stringlib_rpartition(
1779         (PyObject*) self,
1780         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1781         sep->obj, (const char *)sep->buf, sep->len
1782         );
1783 }
1784 
1785 /*[clinic input]
1786 bytes.rsplit = bytes.split
1787 
1788 Return a list of the sections in the bytes, using sep as the delimiter.
1789 
1790 Splitting is done starting at the end of the bytes and working to the front.
1791 [clinic start generated code]*/
1792 
1793 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1794 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1795 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1796 {
1797     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1798     const char *s = PyBytes_AS_STRING(self), *sub;
1799     Py_buffer vsub;
1800     PyObject *list;
1801 
1802     if (maxsplit < 0)
1803         maxsplit = PY_SSIZE_T_MAX;
1804     if (sep == Py_None)
1805         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1806     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1807         return NULL;
1808     sub = vsub.buf;
1809     n = vsub.len;
1810 
1811     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1812     PyBuffer_Release(&vsub);
1813     return list;
1814 }
1815 
1816 
1817 /*[clinic input]
1818 bytes.join
1819 
1820     iterable_of_bytes: object
1821     /
1822 
1823 Concatenate any number of bytes objects.
1824 
1825 The bytes whose method is called is inserted in between each pair.
1826 
1827 The result is returned as a new bytes object.
1828 
1829 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1830 [clinic start generated code]*/
1831 
1832 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1833 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1834 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1835 {
1836     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1837 }
1838 
1839 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1840 _PyBytes_Join(PyObject *sep, PyObject *x)
1841 {
1842     assert(sep != NULL && PyBytes_Check(sep));
1843     assert(x != NULL);
1844     return bytes_join((PyBytesObject*)sep, x);
1845 }
1846 
1847 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1848 bytes_find(PyBytesObject *self, PyObject *args)
1849 {
1850     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851 }
1852 
1853 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1854 bytes_index(PyBytesObject *self, PyObject *args)
1855 {
1856     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857 }
1858 
1859 
1860 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1861 bytes_rfind(PyBytesObject *self, PyObject *args)
1862 {
1863     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1864 }
1865 
1866 
1867 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1868 bytes_rindex(PyBytesObject *self, PyObject *args)
1869 {
1870     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871 }
1872 
1873 
1874 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1875 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1876 {
1877     Py_buffer vsep;
1878     const char *s = PyBytes_AS_STRING(self);
1879     Py_ssize_t len = PyBytes_GET_SIZE(self);
1880     char *sep;
1881     Py_ssize_t seplen;
1882     Py_ssize_t i, j;
1883 
1884     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1885         return NULL;
1886     sep = vsep.buf;
1887     seplen = vsep.len;
1888 
1889     i = 0;
1890     if (striptype != RIGHTSTRIP) {
1891         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892             i++;
1893         }
1894     }
1895 
1896     j = len;
1897     if (striptype != LEFTSTRIP) {
1898         do {
1899             j--;
1900         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901         j++;
1902     }
1903 
1904     PyBuffer_Release(&vsep);
1905 
1906     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1907         Py_INCREF(self);
1908         return (PyObject*)self;
1909     }
1910     else
1911         return PyBytes_FromStringAndSize(s+i, j-i);
1912 }
1913 
1914 
1915 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1916 do_strip(PyBytesObject *self, int striptype)
1917 {
1918     const char *s = PyBytes_AS_STRING(self);
1919     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1920 
1921     i = 0;
1922     if (striptype != RIGHTSTRIP) {
1923         while (i < len && Py_ISSPACE(s[i])) {
1924             i++;
1925         }
1926     }
1927 
1928     j = len;
1929     if (striptype != LEFTSTRIP) {
1930         do {
1931             j--;
1932         } while (j >= i && Py_ISSPACE(s[j]));
1933         j++;
1934     }
1935 
1936     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1937         Py_INCREF(self);
1938         return (PyObject*)self;
1939     }
1940     else
1941         return PyBytes_FromStringAndSize(s+i, j-i);
1942 }
1943 
1944 
1945 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1946 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1947 {
1948     if (bytes != Py_None) {
1949         return do_xstrip(self, striptype, bytes);
1950     }
1951     return do_strip(self, striptype);
1952 }
1953 
1954 /*[clinic input]
1955 bytes.strip
1956 
1957     bytes: object = None
1958     /
1959 
1960 Strip leading and trailing bytes contained in the argument.
1961 
1962 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1963 [clinic start generated code]*/
1964 
1965 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1966 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1967 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1968 {
1969     return do_argstrip(self, BOTHSTRIP, bytes);
1970 }
1971 
1972 /*[clinic input]
1973 bytes.lstrip
1974 
1975     bytes: object = None
1976     /
1977 
1978 Strip leading bytes contained in the argument.
1979 
1980 If the argument is omitted or None, strip leading  ASCII whitespace.
1981 [clinic start generated code]*/
1982 
1983 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)1984 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1985 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
1986 {
1987     return do_argstrip(self, LEFTSTRIP, bytes);
1988 }
1989 
1990 /*[clinic input]
1991 bytes.rstrip
1992 
1993     bytes: object = None
1994     /
1995 
1996 Strip trailing bytes contained in the argument.
1997 
1998 If the argument is omitted or None, strip trailing ASCII whitespace.
1999 [clinic start generated code]*/
2000 
2001 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2002 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2003 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2004 {
2005     return do_argstrip(self, RIGHTSTRIP, bytes);
2006 }
2007 
2008 
2009 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2010 bytes_count(PyBytesObject *self, PyObject *args)
2011 {
2012     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2013 }
2014 
2015 
2016 /*[clinic input]
2017 bytes.translate
2018 
2019     table: object
2020         Translation table, which must be a bytes object of length 256.
2021     /
2022     delete as deletechars: object(c_default="NULL") = b''
2023 
2024 Return a copy with each character mapped by the given translation table.
2025 
2026 All characters occurring in the optional argument delete are removed.
2027 The remaining characters are mapped through the given translation table.
2028 [clinic start generated code]*/
2029 
2030 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2031 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2032                      PyObject *deletechars)
2033 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2034 {
2035     const char *input;
2036     char *output;
2037     Py_buffer table_view = {NULL, NULL};
2038     Py_buffer del_table_view = {NULL, NULL};
2039     const char *table_chars;
2040     Py_ssize_t i, c, changed = 0;
2041     PyObject *input_obj = (PyObject*)self;
2042     const char *output_start, *del_table_chars=NULL;
2043     Py_ssize_t inlen, tablen, dellen = 0;
2044     PyObject *result;
2045     int trans_table[256];
2046 
2047     if (PyBytes_Check(table)) {
2048         table_chars = PyBytes_AS_STRING(table);
2049         tablen = PyBytes_GET_SIZE(table);
2050     }
2051     else if (table == Py_None) {
2052         table_chars = NULL;
2053         tablen = 256;
2054     }
2055     else {
2056         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2057             return NULL;
2058         table_chars = table_view.buf;
2059         tablen = table_view.len;
2060     }
2061 
2062     if (tablen != 256) {
2063         PyErr_SetString(PyExc_ValueError,
2064           "translation table must be 256 characters long");
2065         PyBuffer_Release(&table_view);
2066         return NULL;
2067     }
2068 
2069     if (deletechars != NULL) {
2070         if (PyBytes_Check(deletechars)) {
2071             del_table_chars = PyBytes_AS_STRING(deletechars);
2072             dellen = PyBytes_GET_SIZE(deletechars);
2073         }
2074         else {
2075             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2076                 PyBuffer_Release(&table_view);
2077                 return NULL;
2078             }
2079             del_table_chars = del_table_view.buf;
2080             dellen = del_table_view.len;
2081         }
2082     }
2083     else {
2084         del_table_chars = NULL;
2085         dellen = 0;
2086     }
2087 
2088     inlen = PyBytes_GET_SIZE(input_obj);
2089     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2090     if (result == NULL) {
2091         PyBuffer_Release(&del_table_view);
2092         PyBuffer_Release(&table_view);
2093         return NULL;
2094     }
2095     output_start = output = PyBytes_AS_STRING(result);
2096     input = PyBytes_AS_STRING(input_obj);
2097 
2098     if (dellen == 0 && table_chars != NULL) {
2099         /* If no deletions are required, use faster code */
2100         for (i = inlen; --i >= 0; ) {
2101             c = Py_CHARMASK(*input++);
2102             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2103                 changed = 1;
2104         }
2105         if (!changed && PyBytes_CheckExact(input_obj)) {
2106             Py_INCREF(input_obj);
2107             Py_DECREF(result);
2108             result = input_obj;
2109         }
2110         PyBuffer_Release(&del_table_view);
2111         PyBuffer_Release(&table_view);
2112         return result;
2113     }
2114 
2115     if (table_chars == NULL) {
2116         for (i = 0; i < 256; i++)
2117             trans_table[i] = Py_CHARMASK(i);
2118     } else {
2119         for (i = 0; i < 256; i++)
2120             trans_table[i] = Py_CHARMASK(table_chars[i]);
2121     }
2122     PyBuffer_Release(&table_view);
2123 
2124     for (i = 0; i < dellen; i++)
2125         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2126     PyBuffer_Release(&del_table_view);
2127 
2128     for (i = inlen; --i >= 0; ) {
2129         c = Py_CHARMASK(*input++);
2130         if (trans_table[c] != -1)
2131             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2132                 continue;
2133         changed = 1;
2134     }
2135     if (!changed && PyBytes_CheckExact(input_obj)) {
2136         Py_DECREF(result);
2137         Py_INCREF(input_obj);
2138         return input_obj;
2139     }
2140     /* Fix the size of the resulting byte string */
2141     if (inlen > 0)
2142         _PyBytes_Resize(&result, output - output_start);
2143     return result;
2144 }
2145 
2146 
2147 /*[clinic input]
2148 
2149 @staticmethod
2150 bytes.maketrans
2151 
2152     frm: Py_buffer
2153     to: Py_buffer
2154     /
2155 
2156 Return a translation table useable for the bytes or bytearray translate method.
2157 
2158 The returned table will be one where each byte in frm is mapped to the byte at
2159 the same position in to.
2160 
2161 The bytes objects frm and to must be of the same length.
2162 [clinic start generated code]*/
2163 
2164 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2165 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2166 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2167 {
2168     return _Py_bytes_maketrans(frm, to);
2169 }
2170 
2171 
2172 /*[clinic input]
2173 bytes.replace
2174 
2175     old: Py_buffer
2176     new: Py_buffer
2177     count: Py_ssize_t = -1
2178         Maximum number of occurrences to replace.
2179         -1 (the default value) means replace all occurrences.
2180     /
2181 
2182 Return a copy with all occurrences of substring old replaced by new.
2183 
2184 If the optional argument count is given, only the first count occurrences are
2185 replaced.
2186 [clinic start generated code]*/
2187 
2188 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2189 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2190                    Py_ssize_t count)
2191 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2192 {
2193     return stringlib_replace((PyObject *)self,
2194                              (const char *)old->buf, old->len,
2195                              (const char *)new->buf, new->len, count);
2196 }
2197 
2198 /** End DALKE **/
2199 
2200 /*[clinic input]
2201 bytes.removeprefix as bytes_removeprefix
2202 
2203     prefix: Py_buffer
2204     /
2205 
2206 Return a bytes object with the given prefix string removed if present.
2207 
2208 If the bytes starts with the prefix string, return bytes[len(prefix):].
2209 Otherwise, return a copy of the original bytes.
2210 [clinic start generated code]*/
2211 
2212 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2213 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2214 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2215 {
2216     const char *self_start = PyBytes_AS_STRING(self);
2217     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2218     const char *prefix_start = prefix->buf;
2219     Py_ssize_t prefix_len = prefix->len;
2220 
2221     if (self_len >= prefix_len
2222         && prefix_len > 0
2223         && memcmp(self_start, prefix_start, prefix_len) == 0)
2224     {
2225         return PyBytes_FromStringAndSize(self_start + prefix_len,
2226                                          self_len - prefix_len);
2227     }
2228 
2229     if (PyBytes_CheckExact(self)) {
2230         Py_INCREF(self);
2231         return (PyObject *)self;
2232     }
2233 
2234     return PyBytes_FromStringAndSize(self_start, self_len);
2235 }
2236 
2237 /*[clinic input]
2238 bytes.removesuffix as bytes_removesuffix
2239 
2240     suffix: Py_buffer
2241     /
2242 
2243 Return a bytes object with the given suffix string removed if present.
2244 
2245 If the bytes ends with the suffix string and that suffix is not empty,
2246 return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2247 bytes.
2248 [clinic start generated code]*/
2249 
2250 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2251 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2252 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2253 {
2254     const char *self_start = PyBytes_AS_STRING(self);
2255     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2256     const char *suffix_start = suffix->buf;
2257     Py_ssize_t suffix_len = suffix->len;
2258 
2259     if (self_len >= suffix_len
2260         && suffix_len > 0
2261         && memcmp(self_start + self_len - suffix_len,
2262                   suffix_start, suffix_len) == 0)
2263     {
2264         return PyBytes_FromStringAndSize(self_start,
2265                                          self_len - suffix_len);
2266     }
2267 
2268     if (PyBytes_CheckExact(self)) {
2269         Py_INCREF(self);
2270         return (PyObject *)self;
2271     }
2272 
2273     return PyBytes_FromStringAndSize(self_start, self_len);
2274 }
2275 
2276 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2277 bytes_startswith(PyBytesObject *self, PyObject *args)
2278 {
2279     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2280 }
2281 
2282 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2283 bytes_endswith(PyBytesObject *self, PyObject *args)
2284 {
2285     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2286 }
2287 
2288 
2289 /*[clinic input]
2290 bytes.decode
2291 
2292     encoding: str(c_default="NULL") = 'utf-8'
2293         The encoding with which to decode the bytes.
2294     errors: str(c_default="NULL") = 'strict'
2295         The error handling scheme to use for the handling of decoding errors.
2296         The default is 'strict' meaning that decoding errors raise a
2297         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2298         as well as any other name registered with codecs.register_error that
2299         can handle UnicodeDecodeErrors.
2300 
2301 Decode the bytes using the codec registered for encoding.
2302 [clinic start generated code]*/
2303 
2304 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2305 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2306                   const char *errors)
2307 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2308 {
2309     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2310 }
2311 
2312 
2313 /*[clinic input]
2314 bytes.splitlines
2315 
2316     keepends: bool(accept={int}) = False
2317 
2318 Return a list of the lines in the bytes, breaking at line boundaries.
2319 
2320 Line breaks are not included in the resulting list unless keepends is given and
2321 true.
2322 [clinic start generated code]*/
2323 
2324 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2325 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2326 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2327 {
2328     return stringlib_splitlines(
2329         (PyObject*) self, PyBytes_AS_STRING(self),
2330         PyBytes_GET_SIZE(self), keepends
2331         );
2332 }
2333 
2334 /*[clinic input]
2335 @classmethod
2336 bytes.fromhex
2337 
2338     string: unicode
2339     /
2340 
2341 Create a bytes object from a string of hexadecimal numbers.
2342 
2343 Spaces between two numbers are accepted.
2344 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2345 [clinic start generated code]*/
2346 
2347 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2348 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2349 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2350 {
2351     PyObject *result = _PyBytes_FromHex(string, 0);
2352     if (type != &PyBytes_Type && result != NULL) {
2353         Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2354     }
2355     return result;
2356 }
2357 
2358 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2359 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2360 {
2361     char *buf;
2362     Py_ssize_t hexlen, invalid_char;
2363     unsigned int top, bot;
2364     const Py_UCS1 *str, *end;
2365     _PyBytesWriter writer;
2366 
2367     _PyBytesWriter_Init(&writer);
2368     writer.use_bytearray = use_bytearray;
2369 
2370     assert(PyUnicode_Check(string));
2371     if (PyUnicode_READY(string))
2372         return NULL;
2373     hexlen = PyUnicode_GET_LENGTH(string);
2374 
2375     if (!PyUnicode_IS_ASCII(string)) {
2376         const void *data = PyUnicode_DATA(string);
2377         unsigned int kind = PyUnicode_KIND(string);
2378         Py_ssize_t i;
2379 
2380         /* search for the first non-ASCII character */
2381         for (i = 0; i < hexlen; i++) {
2382             if (PyUnicode_READ(kind, data, i) >= 128)
2383                 break;
2384         }
2385         invalid_char = i;
2386         goto error;
2387     }
2388 
2389     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2390     str = PyUnicode_1BYTE_DATA(string);
2391 
2392     /* This overestimates if there are spaces */
2393     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2394     if (buf == NULL)
2395         return NULL;
2396 
2397     end = str + hexlen;
2398     while (str < end) {
2399         /* skip over spaces in the input */
2400         if (Py_ISSPACE(*str)) {
2401             do {
2402                 str++;
2403             } while (Py_ISSPACE(*str));
2404             if (str >= end)
2405                 break;
2406         }
2407 
2408         top = _PyLong_DigitValue[*str];
2409         if (top >= 16) {
2410             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2411             goto error;
2412         }
2413         str++;
2414 
2415         bot = _PyLong_DigitValue[*str];
2416         if (bot >= 16) {
2417             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2418             goto error;
2419         }
2420         str++;
2421 
2422         *buf++ = (unsigned char)((top << 4) + bot);
2423     }
2424 
2425     return _PyBytesWriter_Finish(&writer, buf);
2426 
2427   error:
2428     PyErr_Format(PyExc_ValueError,
2429                  "non-hexadecimal number found in "
2430                  "fromhex() arg at position %zd", invalid_char);
2431     _PyBytesWriter_Dealloc(&writer);
2432     return NULL;
2433 }
2434 
2435 /*[clinic input]
2436 bytes.hex
2437 
2438     sep: object = NULL
2439         An optional single character or byte to separate hex bytes.
2440     bytes_per_sep: int = 1
2441         How many bytes between separators.  Positive values count from the
2442         right, negative values count from the left.
2443 
2444 Create a string of hexadecimal numbers from a bytes object.
2445 
2446 Example:
2447 >>> value = b'\xb9\x01\xef'
2448 >>> value.hex()
2449 'b901ef'
2450 >>> value.hex(':')
2451 'b9:01:ef'
2452 >>> value.hex(':', 2)
2453 'b9:01ef'
2454 >>> value.hex(':', -2)
2455 'b901:ef'
2456 [clinic start generated code]*/
2457 
2458 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2459 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2460 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2461 {
2462     const char *argbuf = PyBytes_AS_STRING(self);
2463     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2464     return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2465 }
2466 
2467 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2468 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2469 {
2470     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2471 }
2472 
2473 
2474 static PyMethodDef
2475 bytes_methods[] = {
2476     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2477     {"capitalize", stringlib_capitalize, METH_NOARGS,
2478      _Py_capitalize__doc__},
2479     STRINGLIB_CENTER_METHODDEF
2480     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2481      _Py_count__doc__},
2482     BYTES_DECODE_METHODDEF
2483     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2484      _Py_endswith__doc__},
2485     STRINGLIB_EXPANDTABS_METHODDEF
2486     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2487      _Py_find__doc__},
2488     BYTES_FROMHEX_METHODDEF
2489     BYTES_HEX_METHODDEF
2490     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2491     {"isalnum", stringlib_isalnum, METH_NOARGS,
2492      _Py_isalnum__doc__},
2493     {"isalpha", stringlib_isalpha, METH_NOARGS,
2494      _Py_isalpha__doc__},
2495     {"isascii", stringlib_isascii, METH_NOARGS,
2496      _Py_isascii__doc__},
2497     {"isdigit", stringlib_isdigit, METH_NOARGS,
2498      _Py_isdigit__doc__},
2499     {"islower", stringlib_islower, METH_NOARGS,
2500      _Py_islower__doc__},
2501     {"isspace", stringlib_isspace, METH_NOARGS,
2502      _Py_isspace__doc__},
2503     {"istitle", stringlib_istitle, METH_NOARGS,
2504      _Py_istitle__doc__},
2505     {"isupper", stringlib_isupper, METH_NOARGS,
2506      _Py_isupper__doc__},
2507     BYTES_JOIN_METHODDEF
2508     STRINGLIB_LJUST_METHODDEF
2509     {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2510     BYTES_LSTRIP_METHODDEF
2511     BYTES_MAKETRANS_METHODDEF
2512     BYTES_PARTITION_METHODDEF
2513     BYTES_REPLACE_METHODDEF
2514     BYTES_REMOVEPREFIX_METHODDEF
2515     BYTES_REMOVESUFFIX_METHODDEF
2516     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2517     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2518     STRINGLIB_RJUST_METHODDEF
2519     BYTES_RPARTITION_METHODDEF
2520     BYTES_RSPLIT_METHODDEF
2521     BYTES_RSTRIP_METHODDEF
2522     BYTES_SPLIT_METHODDEF
2523     BYTES_SPLITLINES_METHODDEF
2524     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2525      _Py_startswith__doc__},
2526     BYTES_STRIP_METHODDEF
2527     {"swapcase", stringlib_swapcase, METH_NOARGS,
2528      _Py_swapcase__doc__},
2529     {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2530     BYTES_TRANSLATE_METHODDEF
2531     {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2532     STRINGLIB_ZFILL_METHODDEF
2533     {NULL,     NULL}                         /* sentinel */
2534 };
2535 
2536 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2537 bytes_mod(PyObject *self, PyObject *arg)
2538 {
2539     if (!PyBytes_Check(self)) {
2540         Py_RETURN_NOTIMPLEMENTED;
2541     }
2542     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2543                              arg, 0);
2544 }
2545 
2546 static PyNumberMethods bytes_as_number = {
2547     0,              /*nb_add*/
2548     0,              /*nb_subtract*/
2549     0,              /*nb_multiply*/
2550     bytes_mod,      /*nb_remainder*/
2551 };
2552 
2553 static PyObject *
2554 bytes_subtype_new(PyTypeObject *, PyObject *);
2555 
2556 /*[clinic input]
2557 @classmethod
2558 bytes.__new__ as bytes_new
2559 
2560     source as x: object = NULL
2561     encoding: str = NULL
2562     errors: str = NULL
2563 
2564 [clinic start generated code]*/
2565 
2566 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2567 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2568                const char *errors)
2569 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2570 {
2571     PyObject *bytes;
2572     PyObject *func;
2573     Py_ssize_t size;
2574 
2575     if (x == NULL) {
2576         if (encoding != NULL || errors != NULL) {
2577             PyErr_SetString(PyExc_TypeError,
2578                             encoding != NULL ?
2579                             "encoding without a string argument" :
2580                             "errors without a string argument");
2581             return NULL;
2582         }
2583         bytes = PyBytes_FromStringAndSize(NULL, 0);
2584     }
2585     else if (encoding != NULL) {
2586         /* Encode via the codec registry */
2587         if (!PyUnicode_Check(x)) {
2588             PyErr_SetString(PyExc_TypeError,
2589                             "encoding without a string argument");
2590             return NULL;
2591         }
2592         bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2593     }
2594     else if (errors != NULL) {
2595         PyErr_SetString(PyExc_TypeError,
2596                         PyUnicode_Check(x) ?
2597                         "string argument without an encoding" :
2598                         "errors without a string argument");
2599         return NULL;
2600     }
2601     /* We'd like to call PyObject_Bytes here, but we need to check for an
2602        integer argument before deferring to PyBytes_FromObject, something
2603        PyObject_Bytes doesn't do. */
2604     else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
2605         bytes = _PyObject_CallNoArg(func);
2606         Py_DECREF(func);
2607         if (bytes == NULL)
2608             return NULL;
2609         if (!PyBytes_Check(bytes)) {
2610             PyErr_Format(PyExc_TypeError,
2611                         "__bytes__ returned non-bytes (type %.200s)",
2612                         Py_TYPE(bytes)->tp_name);
2613             Py_DECREF(bytes);
2614             return NULL;
2615         }
2616     }
2617     else if (PyErr_Occurred())
2618         return NULL;
2619     else if (PyUnicode_Check(x)) {
2620         PyErr_SetString(PyExc_TypeError,
2621                         "string argument without an encoding");
2622         return NULL;
2623     }
2624     /* Is it an integer? */
2625     else if (_PyIndex_Check(x)) {
2626         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2627         if (size == -1 && PyErr_Occurred()) {
2628             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2629                 return NULL;
2630             PyErr_Clear();  /* fall through */
2631             bytes = PyBytes_FromObject(x);
2632         }
2633         else {
2634             if (size < 0) {
2635                 PyErr_SetString(PyExc_ValueError, "negative count");
2636                 return NULL;
2637             }
2638             bytes = _PyBytes_FromSize(size, 1);
2639         }
2640     }
2641     else {
2642         bytes = PyBytes_FromObject(x);
2643     }
2644 
2645     if (bytes != NULL && type != &PyBytes_Type) {
2646         Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2647     }
2648 
2649     return bytes;
2650 }
2651 
2652 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2653 _PyBytes_FromBuffer(PyObject *x)
2654 {
2655     PyObject *new;
2656     Py_buffer view;
2657 
2658     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2659         return NULL;
2660 
2661     new = PyBytes_FromStringAndSize(NULL, view.len);
2662     if (!new)
2663         goto fail;
2664     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2665                 &view, view.len, 'C') < 0)
2666         goto fail;
2667     PyBuffer_Release(&view);
2668     return new;
2669 
2670 fail:
2671     Py_XDECREF(new);
2672     PyBuffer_Release(&view);
2673     return NULL;
2674 }
2675 
2676 static PyObject*
_PyBytes_FromList(PyObject * x)2677 _PyBytes_FromList(PyObject *x)
2678 {
2679     Py_ssize_t i, size = PyList_GET_SIZE(x);
2680     Py_ssize_t value;
2681     char *str;
2682     PyObject *item;
2683     _PyBytesWriter writer;
2684 
2685     _PyBytesWriter_Init(&writer);
2686     str = _PyBytesWriter_Alloc(&writer, size);
2687     if (str == NULL)
2688         return NULL;
2689     writer.overallocate = 1;
2690     size = writer.allocated;
2691 
2692     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2693         item = PyList_GET_ITEM(x, i);
2694         Py_INCREF(item);
2695         value = PyNumber_AsSsize_t(item, NULL);
2696         Py_DECREF(item);
2697         if (value == -1 && PyErr_Occurred())
2698             goto error;
2699 
2700         if (value < 0 || value >= 256) {
2701             PyErr_SetString(PyExc_ValueError,
2702                             "bytes must be in range(0, 256)");
2703             goto error;
2704         }
2705 
2706         if (i >= size) {
2707             str = _PyBytesWriter_Resize(&writer, str, size+1);
2708             if (str == NULL)
2709                 return NULL;
2710             size = writer.allocated;
2711         }
2712         *str++ = (char) value;
2713     }
2714     return _PyBytesWriter_Finish(&writer, str);
2715 
2716   error:
2717     _PyBytesWriter_Dealloc(&writer);
2718     return NULL;
2719 }
2720 
2721 static PyObject*
_PyBytes_FromTuple(PyObject * x)2722 _PyBytes_FromTuple(PyObject *x)
2723 {
2724     PyObject *bytes;
2725     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2726     Py_ssize_t value;
2727     char *str;
2728     PyObject *item;
2729 
2730     bytes = PyBytes_FromStringAndSize(NULL, size);
2731     if (bytes == NULL)
2732         return NULL;
2733     str = ((PyBytesObject *)bytes)->ob_sval;
2734 
2735     for (i = 0; i < size; i++) {
2736         item = PyTuple_GET_ITEM(x, i);
2737         value = PyNumber_AsSsize_t(item, NULL);
2738         if (value == -1 && PyErr_Occurred())
2739             goto error;
2740 
2741         if (value < 0 || value >= 256) {
2742             PyErr_SetString(PyExc_ValueError,
2743                             "bytes must be in range(0, 256)");
2744             goto error;
2745         }
2746         *str++ = (char) value;
2747     }
2748     return bytes;
2749 
2750   error:
2751     Py_DECREF(bytes);
2752     return NULL;
2753 }
2754 
2755 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2756 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2757 {
2758     char *str;
2759     Py_ssize_t i, size;
2760     _PyBytesWriter writer;
2761 
2762     /* For iterator version, create a bytes object and resize as needed */
2763     size = PyObject_LengthHint(x, 64);
2764     if (size == -1 && PyErr_Occurred())
2765         return NULL;
2766 
2767     _PyBytesWriter_Init(&writer);
2768     str = _PyBytesWriter_Alloc(&writer, size);
2769     if (str == NULL)
2770         return NULL;
2771     writer.overallocate = 1;
2772     size = writer.allocated;
2773 
2774     /* Run the iterator to exhaustion */
2775     for (i = 0; ; i++) {
2776         PyObject *item;
2777         Py_ssize_t value;
2778 
2779         /* Get the next item */
2780         item = PyIter_Next(it);
2781         if (item == NULL) {
2782             if (PyErr_Occurred())
2783                 goto error;
2784             break;
2785         }
2786 
2787         /* Interpret it as an int (__index__) */
2788         value = PyNumber_AsSsize_t(item, NULL);
2789         Py_DECREF(item);
2790         if (value == -1 && PyErr_Occurred())
2791             goto error;
2792 
2793         /* Range check */
2794         if (value < 0 || value >= 256) {
2795             PyErr_SetString(PyExc_ValueError,
2796                             "bytes must be in range(0, 256)");
2797             goto error;
2798         }
2799 
2800         /* Append the byte */
2801         if (i >= size) {
2802             str = _PyBytesWriter_Resize(&writer, str, size+1);
2803             if (str == NULL)
2804                 return NULL;
2805             size = writer.allocated;
2806         }
2807         *str++ = (char) value;
2808     }
2809 
2810     return _PyBytesWriter_Finish(&writer, str);
2811 
2812   error:
2813     _PyBytesWriter_Dealloc(&writer);
2814     return NULL;
2815 }
2816 
2817 PyObject *
PyBytes_FromObject(PyObject * x)2818 PyBytes_FromObject(PyObject *x)
2819 {
2820     PyObject *it, *result;
2821 
2822     if (x == NULL) {
2823         PyErr_BadInternalCall();
2824         return NULL;
2825     }
2826 
2827     if (PyBytes_CheckExact(x)) {
2828         Py_INCREF(x);
2829         return x;
2830     }
2831 
2832     /* Use the modern buffer interface */
2833     if (PyObject_CheckBuffer(x))
2834         return _PyBytes_FromBuffer(x);
2835 
2836     if (PyList_CheckExact(x))
2837         return _PyBytes_FromList(x);
2838 
2839     if (PyTuple_CheckExact(x))
2840         return _PyBytes_FromTuple(x);
2841 
2842     if (!PyUnicode_Check(x)) {
2843         it = PyObject_GetIter(x);
2844         if (it != NULL) {
2845             result = _PyBytes_FromIterator(it, x);
2846             Py_DECREF(it);
2847             return result;
2848         }
2849         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2850             return NULL;
2851         }
2852     }
2853 
2854     PyErr_Format(PyExc_TypeError,
2855                  "cannot convert '%.200s' object to bytes",
2856                  Py_TYPE(x)->tp_name);
2857     return NULL;
2858 }
2859 
2860 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2861 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2862 {
2863     PyObject *pnew;
2864     Py_ssize_t n;
2865 
2866     assert(PyType_IsSubtype(type, &PyBytes_Type));
2867     assert(PyBytes_Check(tmp));
2868     n = PyBytes_GET_SIZE(tmp);
2869     pnew = type->tp_alloc(type, n);
2870     if (pnew != NULL) {
2871         memcpy(PyBytes_AS_STRING(pnew),
2872                   PyBytes_AS_STRING(tmp), n+1);
2873         ((PyBytesObject *)pnew)->ob_shash =
2874             ((PyBytesObject *)tmp)->ob_shash;
2875     }
2876     return pnew;
2877 }
2878 
2879 PyDoc_STRVAR(bytes_doc,
2880 "bytes(iterable_of_ints) -> bytes\n\
2881 bytes(string, encoding[, errors]) -> bytes\n\
2882 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2883 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2884 bytes() -> empty bytes object\n\
2885 \n\
2886 Construct an immutable array of bytes from:\n\
2887   - an iterable yielding integers in range(256)\n\
2888   - a text string encoded using the specified encoding\n\
2889   - any object implementing the buffer API.\n\
2890   - an integer");
2891 
2892 static PyObject *bytes_iter(PyObject *seq);
2893 
2894 PyTypeObject PyBytes_Type = {
2895     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2896     "bytes",
2897     PyBytesObject_SIZE,
2898     sizeof(char),
2899     0,                                          /* tp_dealloc */
2900     0,                                          /* tp_vectorcall_offset */
2901     0,                                          /* tp_getattr */
2902     0,                                          /* tp_setattr */
2903     0,                                          /* tp_as_async */
2904     (reprfunc)bytes_repr,                       /* tp_repr */
2905     &bytes_as_number,                           /* tp_as_number */
2906     &bytes_as_sequence,                         /* tp_as_sequence */
2907     &bytes_as_mapping,                          /* tp_as_mapping */
2908     (hashfunc)bytes_hash,                       /* tp_hash */
2909     0,                                          /* tp_call */
2910     bytes_str,                                  /* tp_str */
2911     PyObject_GenericGetAttr,                    /* tp_getattro */
2912     0,                                          /* tp_setattro */
2913     &bytes_as_buffer,                           /* tp_as_buffer */
2914     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2915         Py_TPFLAGS_BYTES_SUBCLASS |
2916         _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
2917     bytes_doc,                                  /* tp_doc */
2918     0,                                          /* tp_traverse */
2919     0,                                          /* tp_clear */
2920     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2921     0,                                          /* tp_weaklistoffset */
2922     bytes_iter,                                 /* tp_iter */
2923     0,                                          /* tp_iternext */
2924     bytes_methods,                              /* tp_methods */
2925     0,                                          /* tp_members */
2926     0,                                          /* tp_getset */
2927     &PyBaseObject_Type,                         /* tp_base */
2928     0,                                          /* tp_dict */
2929     0,                                          /* tp_descr_get */
2930     0,                                          /* tp_descr_set */
2931     0,                                          /* tp_dictoffset */
2932     0,                                          /* tp_init */
2933     0,                                          /* tp_alloc */
2934     bytes_new,                                  /* tp_new */
2935     PyObject_Del,                               /* tp_free */
2936 };
2937 
2938 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2939 PyBytes_Concat(PyObject **pv, PyObject *w)
2940 {
2941     assert(pv != NULL);
2942     if (*pv == NULL)
2943         return;
2944     if (w == NULL) {
2945         Py_CLEAR(*pv);
2946         return;
2947     }
2948 
2949     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2950         /* Only one reference, so we can resize in place */
2951         Py_ssize_t oldsize;
2952         Py_buffer wb;
2953 
2954         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2955             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2956                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2957             Py_CLEAR(*pv);
2958             return;
2959         }
2960 
2961         oldsize = PyBytes_GET_SIZE(*pv);
2962         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2963             PyErr_NoMemory();
2964             goto error;
2965         }
2966         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2967             goto error;
2968 
2969         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2970         PyBuffer_Release(&wb);
2971         return;
2972 
2973       error:
2974         PyBuffer_Release(&wb);
2975         Py_CLEAR(*pv);
2976         return;
2977     }
2978 
2979     else {
2980         /* Multiple references, need to create new object */
2981         PyObject *v;
2982         v = bytes_concat(*pv, w);
2983         Py_SETREF(*pv, v);
2984     }
2985 }
2986 
2987 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2988 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2989 {
2990     PyBytes_Concat(pv, w);
2991     Py_XDECREF(w);
2992 }
2993 
2994 
2995 /* The following function breaks the notion that bytes are immutable:
2996    it changes the size of a bytes object.  We get away with this only if there
2997    is only one module referencing the object.  You can also think of it
2998    as creating a new bytes object and destroying the old one, only
2999    more efficiently.  In any case, don't use this if the bytes object may
3000    already be known to some other part of the code...
3001    Note that if there's not enough memory to resize the bytes object, the
3002    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3003    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3004    returned, and the value in *pv may or may not be the same as on input.
3005    As always, an extra byte is allocated for a trailing \0 byte (newsize
3006    does *not* include that), and a trailing \0 byte is stored.
3007 */
3008 
3009 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3010 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3011 {
3012     PyObject *v;
3013     PyBytesObject *sv;
3014     v = *pv;
3015     if (!PyBytes_Check(v) || newsize < 0) {
3016         goto error;
3017     }
3018     if (Py_SIZE(v) == newsize) {
3019         /* return early if newsize equals to v->ob_size */
3020         return 0;
3021     }
3022     if (Py_SIZE(v) == 0) {
3023         if (newsize == 0) {
3024             return 0;
3025         }
3026         *pv = _PyBytes_FromSize(newsize, 0);
3027         Py_DECREF(v);
3028         return (*pv == NULL) ? -1 : 0;
3029     }
3030     if (Py_REFCNT(v) != 1) {
3031         goto error;
3032     }
3033     if (newsize == 0) {
3034         *pv = bytes_new_empty();
3035         Py_DECREF(v);
3036         return 0;
3037     }
3038     /* XXX UNREF/NEWREF interface should be more symmetrical */
3039 #ifdef Py_REF_DEBUG
3040     _Py_RefTotal--;
3041 #endif
3042 #ifdef Py_TRACE_REFS
3043     _Py_ForgetReference(v);
3044 #endif
3045     *pv = (PyObject *)
3046         PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3047     if (*pv == NULL) {
3048         PyObject_Free(v);
3049         PyErr_NoMemory();
3050         return -1;
3051     }
3052     _Py_NewReference(*pv);
3053     sv = (PyBytesObject *) *pv;
3054     Py_SET_SIZE(sv, newsize);
3055     sv->ob_sval[newsize] = '\0';
3056     sv->ob_shash = -1;          /* invalidate cached hash value */
3057     return 0;
3058 error:
3059     *pv = 0;
3060     Py_DECREF(v);
3061     PyErr_BadInternalCall();
3062     return -1;
3063 }
3064 
3065 
3066 PyStatus
_PyBytes_Init(PyInterpreterState * interp)3067 _PyBytes_Init(PyInterpreterState *interp)
3068 {
3069     struct _Py_bytes_state *state = &interp->bytes;
3070     if (bytes_create_empty_string_singleton(state) < 0) {
3071         return _PyStatus_NO_MEMORY();
3072     }
3073     return _PyStatus_OK();
3074 }
3075 
3076 
3077 void
_PyBytes_Fini(PyInterpreterState * interp)3078 _PyBytes_Fini(PyInterpreterState *interp)
3079 {
3080     struct _Py_bytes_state* state = &interp->bytes;
3081     for (int i = 0; i < UCHAR_MAX + 1; i++) {
3082         Py_CLEAR(state->characters[i]);
3083     }
3084     Py_CLEAR(state->empty_string);
3085 }
3086 
3087 /*********************** Bytes Iterator ****************************/
3088 
3089 typedef struct {
3090     PyObject_HEAD
3091     Py_ssize_t it_index;
3092     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3093 } striterobject;
3094 
3095 static void
striter_dealloc(striterobject * it)3096 striter_dealloc(striterobject *it)
3097 {
3098     _PyObject_GC_UNTRACK(it);
3099     Py_XDECREF(it->it_seq);
3100     PyObject_GC_Del(it);
3101 }
3102 
3103 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3104 striter_traverse(striterobject *it, visitproc visit, void *arg)
3105 {
3106     Py_VISIT(it->it_seq);
3107     return 0;
3108 }
3109 
3110 static PyObject *
striter_next(striterobject * it)3111 striter_next(striterobject *it)
3112 {
3113     PyBytesObject *seq;
3114 
3115     assert(it != NULL);
3116     seq = it->it_seq;
3117     if (seq == NULL)
3118         return NULL;
3119     assert(PyBytes_Check(seq));
3120 
3121     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3122         return PyLong_FromLong(
3123             (unsigned char)seq->ob_sval[it->it_index++]);
3124     }
3125 
3126     it->it_seq = NULL;
3127     Py_DECREF(seq);
3128     return NULL;
3129 }
3130 
3131 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3132 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3133 {
3134     Py_ssize_t len = 0;
3135     if (it->it_seq)
3136         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3137     return PyLong_FromSsize_t(len);
3138 }
3139 
3140 PyDoc_STRVAR(length_hint_doc,
3141              "Private method returning an estimate of len(list(it)).");
3142 
3143 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3144 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3145 {
3146     _Py_IDENTIFIER(iter);
3147     if (it->it_seq != NULL) {
3148         return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3149                              it->it_seq, it->it_index);
3150     } else {
3151         return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3152     }
3153 }
3154 
3155 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3156 
3157 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3158 striter_setstate(striterobject *it, PyObject *state)
3159 {
3160     Py_ssize_t index = PyLong_AsSsize_t(state);
3161     if (index == -1 && PyErr_Occurred())
3162         return NULL;
3163     if (it->it_seq != NULL) {
3164         if (index < 0)
3165             index = 0;
3166         else if (index > PyBytes_GET_SIZE(it->it_seq))
3167             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3168         it->it_index = index;
3169     }
3170     Py_RETURN_NONE;
3171 }
3172 
3173 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3174 
3175 static PyMethodDef striter_methods[] = {
3176     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3177      length_hint_doc},
3178     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3179      reduce_doc},
3180     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3181      setstate_doc},
3182     {NULL,              NULL}           /* sentinel */
3183 };
3184 
3185 PyTypeObject PyBytesIter_Type = {
3186     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3187     "bytes_iterator",                           /* tp_name */
3188     sizeof(striterobject),                      /* tp_basicsize */
3189     0,                                          /* tp_itemsize */
3190     /* methods */
3191     (destructor)striter_dealloc,                /* tp_dealloc */
3192     0,                                          /* tp_vectorcall_offset */
3193     0,                                          /* tp_getattr */
3194     0,                                          /* tp_setattr */
3195     0,                                          /* tp_as_async */
3196     0,                                          /* tp_repr */
3197     0,                                          /* tp_as_number */
3198     0,                                          /* tp_as_sequence */
3199     0,                                          /* tp_as_mapping */
3200     0,                                          /* tp_hash */
3201     0,                                          /* tp_call */
3202     0,                                          /* tp_str */
3203     PyObject_GenericGetAttr,                    /* tp_getattro */
3204     0,                                          /* tp_setattro */
3205     0,                                          /* tp_as_buffer */
3206     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3207     0,                                          /* tp_doc */
3208     (traverseproc)striter_traverse,     /* tp_traverse */
3209     0,                                          /* tp_clear */
3210     0,                                          /* tp_richcompare */
3211     0,                                          /* tp_weaklistoffset */
3212     PyObject_SelfIter,                          /* tp_iter */
3213     (iternextfunc)striter_next,                 /* tp_iternext */
3214     striter_methods,                            /* tp_methods */
3215     0,
3216 };
3217 
3218 static PyObject *
bytes_iter(PyObject * seq)3219 bytes_iter(PyObject *seq)
3220 {
3221     striterobject *it;
3222 
3223     if (!PyBytes_Check(seq)) {
3224         PyErr_BadInternalCall();
3225         return NULL;
3226     }
3227     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3228     if (it == NULL)
3229         return NULL;
3230     it->it_index = 0;
3231     Py_INCREF(seq);
3232     it->it_seq = (PyBytesObject *)seq;
3233     _PyObject_GC_TRACK(it);
3234     return (PyObject *)it;
3235 }
3236 
3237 
3238 /* _PyBytesWriter API */
3239 
3240 #ifdef MS_WINDOWS
3241    /* On Windows, overallocate by 50% is the best factor */
3242 #  define OVERALLOCATE_FACTOR 2
3243 #else
3244    /* On Linux, overallocate by 25% is the best factor */
3245 #  define OVERALLOCATE_FACTOR 4
3246 #endif
3247 
3248 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3249 _PyBytesWriter_Init(_PyBytesWriter *writer)
3250 {
3251     /* Set all attributes before small_buffer to 0 */
3252     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3253 #ifndef NDEBUG
3254     memset(writer->small_buffer, PYMEM_CLEANBYTE,
3255            sizeof(writer->small_buffer));
3256 #endif
3257 }
3258 
3259 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3260 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3261 {
3262     Py_CLEAR(writer->buffer);
3263 }
3264 
3265 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3266 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3267 {
3268     if (writer->use_small_buffer) {
3269         assert(writer->buffer == NULL);
3270         return writer->small_buffer;
3271     }
3272     else if (writer->use_bytearray) {
3273         assert(writer->buffer != NULL);
3274         return PyByteArray_AS_STRING(writer->buffer);
3275     }
3276     else {
3277         assert(writer->buffer != NULL);
3278         return PyBytes_AS_STRING(writer->buffer);
3279     }
3280 }
3281 
3282 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3283 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3284 {
3285     const char *start = _PyBytesWriter_AsString(writer);
3286     assert(str != NULL);
3287     assert(str >= start);
3288     assert(str - start <= writer->allocated);
3289     return str - start;
3290 }
3291 
3292 #ifndef NDEBUG
3293 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3294 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3295 {
3296     const char *start, *end;
3297 
3298     if (writer->use_small_buffer) {
3299         assert(writer->buffer == NULL);
3300     }
3301     else {
3302         assert(writer->buffer != NULL);
3303         if (writer->use_bytearray)
3304             assert(PyByteArray_CheckExact(writer->buffer));
3305         else
3306             assert(PyBytes_CheckExact(writer->buffer));
3307         assert(Py_REFCNT(writer->buffer) == 1);
3308     }
3309 
3310     if (writer->use_bytearray) {
3311         /* bytearray has its own overallocation algorithm,
3312            writer overallocation must be disabled */
3313         assert(!writer->overallocate);
3314     }
3315 
3316     assert(0 <= writer->allocated);
3317     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3318     /* the last byte must always be null */
3319     start = _PyBytesWriter_AsString(writer);
3320     assert(start[writer->allocated] == 0);
3321 
3322     end = start + writer->allocated;
3323     assert(str != NULL);
3324     assert(start <= str && str <= end);
3325     return 1;
3326 }
3327 #endif
3328 
3329 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3330 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3331 {
3332     Py_ssize_t allocated, pos;
3333 
3334     assert(_PyBytesWriter_CheckConsistency(writer, str));
3335     assert(writer->allocated < size);
3336 
3337     allocated = size;
3338     if (writer->overallocate
3339         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3340         /* overallocate to limit the number of realloc() */
3341         allocated += allocated / OVERALLOCATE_FACTOR;
3342     }
3343 
3344     pos = _PyBytesWriter_GetSize(writer, str);
3345     if (!writer->use_small_buffer) {
3346         if (writer->use_bytearray) {
3347             if (PyByteArray_Resize(writer->buffer, allocated))
3348                 goto error;
3349             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3350                but we cannot use ob_alloc because bytes may need to be moved
3351                to use the whole buffer. bytearray uses an internal optimization
3352                to avoid moving or copying bytes when bytes are removed at the
3353                beginning (ex: del bytearray[:1]). */
3354         }
3355         else {
3356             if (_PyBytes_Resize(&writer->buffer, allocated))
3357                 goto error;
3358         }
3359     }
3360     else {
3361         /* convert from stack buffer to bytes object buffer */
3362         assert(writer->buffer == NULL);
3363 
3364         if (writer->use_bytearray)
3365             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3366         else
3367             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3368         if (writer->buffer == NULL)
3369             goto error;
3370 
3371         if (pos != 0) {
3372             char *dest;
3373             if (writer->use_bytearray)
3374                 dest = PyByteArray_AS_STRING(writer->buffer);
3375             else
3376                 dest = PyBytes_AS_STRING(writer->buffer);
3377             memcpy(dest,
3378                       writer->small_buffer,
3379                       pos);
3380         }
3381 
3382         writer->use_small_buffer = 0;
3383 #ifndef NDEBUG
3384         memset(writer->small_buffer, PYMEM_CLEANBYTE,
3385                sizeof(writer->small_buffer));
3386 #endif
3387     }
3388     writer->allocated = allocated;
3389 
3390     str = _PyBytesWriter_AsString(writer) + pos;
3391     assert(_PyBytesWriter_CheckConsistency(writer, str));
3392     return str;
3393 
3394 error:
3395     _PyBytesWriter_Dealloc(writer);
3396     return NULL;
3397 }
3398 
3399 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3400 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3401 {
3402     Py_ssize_t new_min_size;
3403 
3404     assert(_PyBytesWriter_CheckConsistency(writer, str));
3405     assert(size >= 0);
3406 
3407     if (size == 0) {
3408         /* nothing to do */
3409         return str;
3410     }
3411 
3412     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3413         PyErr_NoMemory();
3414         _PyBytesWriter_Dealloc(writer);
3415         return NULL;
3416     }
3417     new_min_size = writer->min_size + size;
3418 
3419     if (new_min_size > writer->allocated)
3420         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3421 
3422     writer->min_size = new_min_size;
3423     return str;
3424 }
3425 
3426 /* Allocate the buffer to write size bytes.
3427    Return the pointer to the beginning of buffer data.
3428    Raise an exception and return NULL on error. */
3429 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3430 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3431 {
3432     /* ensure that _PyBytesWriter_Alloc() is only called once */
3433     assert(writer->min_size == 0 && writer->buffer == NULL);
3434     assert(size >= 0);
3435 
3436     writer->use_small_buffer = 1;
3437 #ifndef NDEBUG
3438     writer->allocated = sizeof(writer->small_buffer) - 1;
3439     /* In debug mode, don't use the full small buffer because it is less
3440        efficient than bytes and bytearray objects to detect buffer underflow
3441        and buffer overflow. Use 10 bytes of the small buffer to test also
3442        code using the smaller buffer in debug mode.
3443 
3444        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3445        in debug mode to also be able to detect stack overflow when running
3446        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3447        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3448        stack overflow. */
3449     writer->allocated = Py_MIN(writer->allocated, 10);
3450     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3451        to detect buffer overflow */
3452     writer->small_buffer[writer->allocated] = 0;
3453 #else
3454     writer->allocated = sizeof(writer->small_buffer);
3455 #endif
3456     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3457 }
3458 
3459 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3460 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3461 {
3462     Py_ssize_t size;
3463     PyObject *result;
3464 
3465     assert(_PyBytesWriter_CheckConsistency(writer, str));
3466 
3467     size = _PyBytesWriter_GetSize(writer, str);
3468     if (size == 0 && !writer->use_bytearray) {
3469         Py_CLEAR(writer->buffer);
3470         /* Get the empty byte string singleton */
3471         result = PyBytes_FromStringAndSize(NULL, 0);
3472     }
3473     else if (writer->use_small_buffer) {
3474         if (writer->use_bytearray) {
3475             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3476         }
3477         else {
3478             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3479         }
3480     }
3481     else {
3482         result = writer->buffer;
3483         writer->buffer = NULL;
3484 
3485         if (size != writer->allocated) {
3486             if (writer->use_bytearray) {
3487                 if (PyByteArray_Resize(result, size)) {
3488                     Py_DECREF(result);
3489                     return NULL;
3490                 }
3491             }
3492             else {
3493                 if (_PyBytes_Resize(&result, size)) {
3494                     assert(result == NULL);
3495                     return NULL;
3496                 }
3497             }
3498         }
3499     }
3500     return result;
3501 }
3502 
3503 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3504 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3505                           const void *bytes, Py_ssize_t size)
3506 {
3507     char *str = (char *)ptr;
3508 
3509     str = _PyBytesWriter_Prepare(writer, str, size);
3510     if (str == NULL)
3511         return NULL;
3512 
3513     memcpy(str, bytes, size);
3514     str += size;
3515 
3516     return str;
3517 }
3518