• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "pycore_abstract.h"      // _PyIndex_Check()
7 #include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
8 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
9 #include "pycore_call.h"          // _PyObject_CallNoArgs()
10 #include "pycore_format.h"        // F_LJUST
11 #include "pycore_global_objects.h"  // _Py_GET_GLOBAL_OBJECT()
12 #include "pycore_initconfig.h"    // _PyStatus_OK()
13 #include "pycore_long.h"          // _PyLong_DigitValue
14 #include "pycore_object.h"        // _PyObject_GC_TRACK
15 #include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16 #include "pycore_strhex.h"        // _Py_strhex_with_sep()
17 
18 #include <stddef.h>
19 
20 /*[clinic input]
21 class bytes "PyBytesObject *" "&PyBytes_Type"
22 [clinic start generated code]*/
23 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24 
25 #include "clinic/bytesobject.c.h"
26 
27 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28    for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29 
30    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31    3 or 7 bytes per bytes object allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34 
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37                                                    char *str);
38 
39 
40 #define CHARACTERS _Py_SINGLETON(bytes_characters)
41 #define CHARACTER(ch) \
42      ((PyBytesObject *)&(CHARACTERS[ch]));
43 #define EMPTY (&_Py_SINGLETON(bytes_empty))
44 
45 
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49     return &EMPTY->ob_base.ob_base;
50 }
51 
52 
53 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)54 static inline PyObject* bytes_new_empty(void)
55 {
56     Py_INCREF(EMPTY);
57     return (PyObject *)EMPTY;
58 }
59 
60 
61 /*
62    For PyBytes_FromString(), the parameter `str' points to a null-terminated
63    string containing exactly `size' bytes.
64 
65    For PyBytes_FromStringAndSize(), the parameter `str' is
66    either NULL or else points to a string containing at least `size' bytes.
67    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
68    not have to be null-terminated.  (Therefore it is safe to construct a
69    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
70    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
71    bytes (setting the last byte to the null terminating character) and you can
72    fill in the data yourself.  If `str' is non-NULL then the resulting
73    PyBytes object must be treated as immutable and you must not fill in nor
74    alter the data yourself, since the strings may be shared.
75 
76    The PyObject member `op->ob_size', which denotes the number of "extra
77    items" in a variable-size object, will contain the number of bytes
78    allocated for string data, not counting the null terminating character.
79    It is therefore equal to the `size' parameter (for
80    PyBytes_FromStringAndSize()) or the length of the string in the `str'
81    parameter (for PyBytes_FromString()).
82 */
83 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)84 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
85 {
86     PyBytesObject *op;
87     assert(size >= 0);
88 
89     if (size == 0) {
90         return bytes_new_empty();
91     }
92 
93     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
94         PyErr_SetString(PyExc_OverflowError,
95                         "byte string is too large");
96         return NULL;
97     }
98 
99     /* Inline PyObject_NewVar */
100     if (use_calloc)
101         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102     else
103         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
104     if (op == NULL) {
105         return PyErr_NoMemory();
106     }
107     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
108 _Py_COMP_DIAG_PUSH
109 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
110     op->ob_shash = -1;
111 _Py_COMP_DIAG_POP
112     if (!use_calloc) {
113         op->ob_sval[size] = '\0';
114     }
115     return (PyObject *) op;
116 }
117 
118 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)119 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120 {
121     PyBytesObject *op;
122     if (size < 0) {
123         PyErr_SetString(PyExc_SystemError,
124             "Negative size passed to PyBytes_FromStringAndSize");
125         return NULL;
126     }
127     if (size == 1 && str != NULL) {
128         op = CHARACTER(*str & 255);
129         Py_INCREF(op);
130         return (PyObject *)op;
131     }
132     if (size == 0) {
133         return bytes_new_empty();
134     }
135 
136     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137     if (op == NULL)
138         return NULL;
139     if (str == NULL)
140         return (PyObject *) op;
141 
142     memcpy(op->ob_sval, str, size);
143     return (PyObject *) op;
144 }
145 
146 PyObject *
PyBytes_FromString(const char * str)147 PyBytes_FromString(const char *str)
148 {
149     size_t size;
150     PyBytesObject *op;
151 
152     assert(str != NULL);
153     size = strlen(str);
154     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155         PyErr_SetString(PyExc_OverflowError,
156             "byte string is too long");
157         return NULL;
158     }
159 
160     if (size == 0) {
161         return bytes_new_empty();
162     }
163     else if (size == 1) {
164         op = CHARACTER(*str & 255);
165         Py_INCREF(op);
166         return (PyObject *)op;
167     }
168 
169     /* Inline PyObject_NewVar */
170     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
171     if (op == NULL) {
172         return PyErr_NoMemory();
173     }
174     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
175 _Py_COMP_DIAG_PUSH
176 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
177     op->ob_shash = -1;
178 _Py_COMP_DIAG_POP
179     memcpy(op->ob_sval, str, size+1);
180     return (PyObject *) op;
181 }
182 
183 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)184 PyBytes_FromFormatV(const char *format, va_list vargs)
185 {
186     char *s;
187     const char *f;
188     const char *p;
189     Py_ssize_t prec;
190     int longflag;
191     int size_tflag;
192     /* Longest 64-bit formatted numbers:
193        - "18446744073709551615\0" (21 bytes)
194        - "-9223372036854775808\0" (21 bytes)
195        Decimal takes the most space (it isn't enough for octal.)
196 
197        Longest 64-bit pointer representation:
198        "0xffffffffffffffff\0" (19 bytes). */
199     char buffer[21];
200     _PyBytesWriter writer;
201 
202     _PyBytesWriter_Init(&writer);
203 
204     s = _PyBytesWriter_Alloc(&writer, strlen(format));
205     if (s == NULL)
206         return NULL;
207     writer.overallocate = 1;
208 
209 #define WRITE_BYTES(str) \
210     do { \
211         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212         if (s == NULL) \
213             goto error; \
214     } while (0)
215 
216     for (f = format; *f; f++) {
217         if (*f != '%') {
218             *s++ = *f;
219             continue;
220         }
221 
222         p = f++;
223 
224         /* ignore the width (ex: 10 in "%10s") */
225         while (Py_ISDIGIT(*f))
226             f++;
227 
228         /* parse the precision (ex: 10 in "%.10s") */
229         prec = 0;
230         if (*f == '.') {
231             f++;
232             for (; Py_ISDIGIT(*f); f++) {
233                 prec = (prec * 10) + (*f - '0');
234             }
235         }
236 
237         while (*f && *f != '%' && !Py_ISALPHA(*f))
238             f++;
239 
240         /* handle the long flag ('l'), but only for %ld and %lu.
241            others can be added when necessary. */
242         longflag = 0;
243         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244             longflag = 1;
245             ++f;
246         }
247 
248         /* handle the size_t flag ('z'). */
249         size_tflag = 0;
250         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251             size_tflag = 1;
252             ++f;
253         }
254 
255         /* subtract bytes preallocated for the format string
256            (ex: 2 for "%s") */
257         writer.min_size -= (f - p + 1);
258 
259         switch (*f) {
260         case 'c':
261         {
262             int c = va_arg(vargs, int);
263             if (c < 0 || c > 255) {
264                 PyErr_SetString(PyExc_OverflowError,
265                                 "PyBytes_FromFormatV(): %c format "
266                                 "expects an integer in range [0; 255]");
267                 goto error;
268             }
269             writer.min_size++;
270             *s++ = (unsigned char)c;
271             break;
272         }
273 
274         case 'd':
275             if (longflag) {
276                 sprintf(buffer, "%ld", va_arg(vargs, long));
277             }
278             else if (size_tflag) {
279                 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
280             }
281             else {
282                 sprintf(buffer, "%d", va_arg(vargs, int));
283             }
284             assert(strlen(buffer) < sizeof(buffer));
285             WRITE_BYTES(buffer);
286             break;
287 
288         case 'u':
289             if (longflag) {
290                 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
291             }
292             else if (size_tflag) {
293                 sprintf(buffer, "%zu", va_arg(vargs, size_t));
294             }
295             else {
296                 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
297             }
298             assert(strlen(buffer) < sizeof(buffer));
299             WRITE_BYTES(buffer);
300             break;
301 
302         case 'i':
303             sprintf(buffer, "%i", va_arg(vargs, int));
304             assert(strlen(buffer) < sizeof(buffer));
305             WRITE_BYTES(buffer);
306             break;
307 
308         case 'x':
309             sprintf(buffer, "%x", va_arg(vargs, int));
310             assert(strlen(buffer) < sizeof(buffer));
311             WRITE_BYTES(buffer);
312             break;
313 
314         case 's':
315         {
316             Py_ssize_t i;
317 
318             p = va_arg(vargs, const char*);
319             if (prec <= 0) {
320                 i = strlen(p);
321             }
322             else {
323                 i = 0;
324                 while (i < prec && p[i]) {
325                     i++;
326                 }
327             }
328             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
329             if (s == NULL)
330                 goto error;
331             break;
332         }
333 
334         case 'p':
335             sprintf(buffer, "%p", va_arg(vargs, void*));
336             assert(strlen(buffer) < sizeof(buffer));
337             /* %p is ill-defined:  ensure leading 0x. */
338             if (buffer[1] == 'X')
339                 buffer[1] = 'x';
340             else if (buffer[1] != 'x') {
341                 memmove(buffer+2, buffer, strlen(buffer)+1);
342                 buffer[0] = '0';
343                 buffer[1] = 'x';
344             }
345             WRITE_BYTES(buffer);
346             break;
347 
348         case '%':
349             writer.min_size++;
350             *s++ = '%';
351             break;
352 
353         default:
354             if (*f == 0) {
355                 /* fix min_size if we reached the end of the format string */
356                 writer.min_size++;
357             }
358 
359             /* invalid format string: copy unformatted string and exit */
360             WRITE_BYTES(p);
361             return _PyBytesWriter_Finish(&writer, s);
362         }
363     }
364 
365 #undef WRITE_BYTES
366 
367     return _PyBytesWriter_Finish(&writer, s);
368 
369  error:
370     _PyBytesWriter_Dealloc(&writer);
371     return NULL;
372 }
373 
374 PyObject *
PyBytes_FromFormat(const char * format,...)375 PyBytes_FromFormat(const char *format, ...)
376 {
377     PyObject* ret;
378     va_list vargs;
379 
380 #ifdef HAVE_STDARG_PROTOTYPES
381     va_start(vargs, format);
382 #else
383     va_start(vargs);
384 #endif
385     ret = PyBytes_FromFormatV(format, vargs);
386     va_end(vargs);
387     return ret;
388 }
389 
390 /* Helpers for formatstring */
391 
392 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)393 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
394 {
395     Py_ssize_t argidx = *p_argidx;
396     if (argidx < arglen) {
397         (*p_argidx)++;
398         if (arglen < 0)
399             return args;
400         else
401             return PyTuple_GetItem(args, argidx);
402     }
403     PyErr_SetString(PyExc_TypeError,
404                     "not enough arguments for format string");
405     return NULL;
406 }
407 
408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
409 
410 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)411 formatfloat(PyObject *v, int flags, int prec, int type,
412             PyObject **p_result, _PyBytesWriter *writer, char *str)
413 {
414     char *p;
415     PyObject *result;
416     double x;
417     size_t len;
418     int dtoa_flags = 0;
419 
420     x = PyFloat_AsDouble(v);
421     if (x == -1.0 && PyErr_Occurred()) {
422         PyErr_Format(PyExc_TypeError, "float argument required, "
423                      "not %.200s", Py_TYPE(v)->tp_name);
424         return NULL;
425     }
426 
427     if (prec < 0)
428         prec = 6;
429 
430     if (flags & F_ALT) {
431         dtoa_flags |= Py_DTSF_ALT;
432     }
433     p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
434 
435     if (p == NULL)
436         return NULL;
437 
438     len = strlen(p);
439     if (writer != NULL) {
440         str = _PyBytesWriter_Prepare(writer, str, len);
441         if (str == NULL) {
442             PyMem_Free(p);
443             return NULL;
444         }
445         memcpy(str, p, len);
446         PyMem_Free(p);
447         str += len;
448         return str;
449     }
450 
451     result = PyBytes_FromStringAndSize(p, len);
452     PyMem_Free(p);
453     *p_result = result;
454     return result != NULL ? str : NULL;
455 }
456 
457 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)458 formatlong(PyObject *v, int flags, int prec, int type)
459 {
460     PyObject *result, *iobj;
461     if (type == 'i')
462         type = 'd';
463     if (PyLong_Check(v))
464         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
465     if (PyNumber_Check(v)) {
466         /* make sure number is a type of integer for o, x, and X */
467         if (type == 'o' || type == 'x' || type == 'X')
468             iobj = _PyNumber_Index(v);
469         else
470             iobj = PyNumber_Long(v);
471         if (iobj != NULL) {
472             assert(PyLong_Check(iobj));
473             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474             Py_DECREF(iobj);
475             return result;
476         }
477         if (!PyErr_ExceptionMatches(PyExc_TypeError))
478             return NULL;
479     }
480     PyErr_Format(PyExc_TypeError,
481         "%%%c format: %s is required, not %.200s", type,
482         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483                                                     : "a real number",
484         Py_TYPE(v)->tp_name);
485     return NULL;
486 }
487 
488 static int
byte_converter(PyObject * arg,char * p)489 byte_converter(PyObject *arg, char *p)
490 {
491     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492         *p = PyBytes_AS_STRING(arg)[0];
493         return 1;
494     }
495     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496         *p = PyByteArray_AS_STRING(arg)[0];
497         return 1;
498     }
499     else {
500         int overflow;
501         long ival = PyLong_AsLongAndOverflow(arg, &overflow);
502         if (ival == -1 && PyErr_Occurred()) {
503             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
504                 goto onError;
505             }
506             return 0;
507         }
508         if (!(0 <= ival && ival <= 255)) {
509             /* this includes an overflow in converting to C long */
510             PyErr_SetString(PyExc_OverflowError,
511                             "%c arg not in range(256)");
512             return 0;
513         }
514         *p = (char)ival;
515         return 1;
516     }
517   onError:
518     PyErr_SetString(PyExc_TypeError,
519         "%c requires an integer in range(256) or a single byte");
520     return 0;
521 }
522 
523 static PyObject *_PyBytes_FromBuffer(PyObject *x);
524 
525 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)526 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
527 {
528     PyObject *func, *result;
529     /* is it a bytes object? */
530     if (PyBytes_Check(v)) {
531         *pbuf = PyBytes_AS_STRING(v);
532         *plen = PyBytes_GET_SIZE(v);
533         Py_INCREF(v);
534         return v;
535     }
536     if (PyByteArray_Check(v)) {
537         *pbuf = PyByteArray_AS_STRING(v);
538         *plen = PyByteArray_GET_SIZE(v);
539         Py_INCREF(v);
540         return v;
541     }
542     /* does it support __bytes__? */
543     func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
544     if (func != NULL) {
545         result = _PyObject_CallNoArgs(func);
546         Py_DECREF(func);
547         if (result == NULL)
548             return NULL;
549         if (!PyBytes_Check(result)) {
550             PyErr_Format(PyExc_TypeError,
551                          "__bytes__ returned non-bytes (type %.200s)",
552                          Py_TYPE(result)->tp_name);
553             Py_DECREF(result);
554             return NULL;
555         }
556         *pbuf = PyBytes_AS_STRING(result);
557         *plen = PyBytes_GET_SIZE(result);
558         return result;
559     }
560     /* does it support buffer protocol? */
561     if (PyObject_CheckBuffer(v)) {
562         /* maybe we can avoid making a copy of the buffer object here? */
563         result = _PyBytes_FromBuffer(v);
564         if (result == NULL)
565             return NULL;
566         *pbuf = PyBytes_AS_STRING(result);
567         *plen = PyBytes_GET_SIZE(result);
568         return result;
569     }
570     PyErr_Format(PyExc_TypeError,
571                  "%%b requires a bytes-like object, "
572                  "or an object that implements __bytes__, not '%.100s'",
573                  Py_TYPE(v)->tp_name);
574     return NULL;
575 }
576 
577 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
578 
579 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)580 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
581                   PyObject *args, int use_bytearray)
582 {
583     const char *fmt;
584     char *res;
585     Py_ssize_t arglen, argidx;
586     Py_ssize_t fmtcnt;
587     int args_owned = 0;
588     PyObject *dict = NULL;
589     _PyBytesWriter writer;
590 
591     if (args == NULL) {
592         PyErr_BadInternalCall();
593         return NULL;
594     }
595     fmt = format;
596     fmtcnt = format_len;
597 
598     _PyBytesWriter_Init(&writer);
599     writer.use_bytearray = use_bytearray;
600 
601     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
602     if (res == NULL)
603         return NULL;
604     if (!use_bytearray)
605         writer.overallocate = 1;
606 
607     if (PyTuple_Check(args)) {
608         arglen = PyTuple_GET_SIZE(args);
609         argidx = 0;
610     }
611     else {
612         arglen = -1;
613         argidx = -2;
614     }
615     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
616         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
617         !PyByteArray_Check(args)) {
618             dict = args;
619     }
620 
621     while (--fmtcnt >= 0) {
622         if (*fmt != '%') {
623             Py_ssize_t len;
624             char *pos;
625 
626             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
627             if (pos != NULL)
628                 len = pos - fmt;
629             else
630                 len = fmtcnt + 1;
631             assert(len != 0);
632 
633             memcpy(res, fmt, len);
634             res += len;
635             fmt += len;
636             fmtcnt -= (len - 1);
637         }
638         else {
639             /* Got a format specifier */
640             int flags = 0;
641             Py_ssize_t width = -1;
642             int prec = -1;
643             int c = '\0';
644             int fill;
645             PyObject *v = NULL;
646             PyObject *temp = NULL;
647             const char *pbuf = NULL;
648             int sign;
649             Py_ssize_t len = 0;
650             char onechar; /* For byte_converter() */
651             Py_ssize_t alloc;
652 
653             fmt++;
654             if (*fmt == '%') {
655                 *res++ = '%';
656                 fmt++;
657                 fmtcnt--;
658                 continue;
659             }
660             if (*fmt == '(') {
661                 const char *keystart;
662                 Py_ssize_t keylen;
663                 PyObject *key;
664                 int pcount = 1;
665 
666                 if (dict == NULL) {
667                     PyErr_SetString(PyExc_TypeError,
668                              "format requires a mapping");
669                     goto error;
670                 }
671                 ++fmt;
672                 --fmtcnt;
673                 keystart = fmt;
674                 /* Skip over balanced parentheses */
675                 while (pcount > 0 && --fmtcnt >= 0) {
676                     if (*fmt == ')')
677                         --pcount;
678                     else if (*fmt == '(')
679                         ++pcount;
680                     fmt++;
681                 }
682                 keylen = fmt - keystart - 1;
683                 if (fmtcnt < 0 || pcount > 0) {
684                     PyErr_SetString(PyExc_ValueError,
685                                "incomplete format key");
686                     goto error;
687                 }
688                 key = PyBytes_FromStringAndSize(keystart,
689                                                  keylen);
690                 if (key == NULL)
691                     goto error;
692                 if (args_owned) {
693                     Py_DECREF(args);
694                     args_owned = 0;
695                 }
696                 args = PyObject_GetItem(dict, key);
697                 Py_DECREF(key);
698                 if (args == NULL) {
699                     goto error;
700                 }
701                 args_owned = 1;
702                 arglen = -1;
703                 argidx = -2;
704             }
705 
706             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
707             while (--fmtcnt >= 0) {
708                 switch (c = *fmt++) {
709                 case '-': flags |= F_LJUST; continue;
710                 case '+': flags |= F_SIGN; continue;
711                 case ' ': flags |= F_BLANK; continue;
712                 case '#': flags |= F_ALT; continue;
713                 case '0': flags |= F_ZERO; continue;
714                 }
715                 break;
716             }
717 
718             /* Parse width. Example: "%10s" => width=10 */
719             if (c == '*') {
720                 v = getnextarg(args, arglen, &argidx);
721                 if (v == NULL)
722                     goto error;
723                 if (!PyLong_Check(v)) {
724                     PyErr_SetString(PyExc_TypeError,
725                                     "* wants int");
726                     goto error;
727                 }
728                 width = PyLong_AsSsize_t(v);
729                 if (width == -1 && PyErr_Occurred())
730                     goto error;
731                 if (width < 0) {
732                     flags |= F_LJUST;
733                     width = -width;
734                 }
735                 if (--fmtcnt >= 0)
736                     c = *fmt++;
737             }
738             else if (c >= 0 && isdigit(c)) {
739                 width = c - '0';
740                 while (--fmtcnt >= 0) {
741                     c = Py_CHARMASK(*fmt++);
742                     if (!isdigit(c))
743                         break;
744                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
745                         PyErr_SetString(
746                             PyExc_ValueError,
747                             "width too big");
748                         goto error;
749                     }
750                     width = width*10 + (c - '0');
751                 }
752             }
753 
754             /* Parse precision. Example: "%.3f" => prec=3 */
755             if (c == '.') {
756                 prec = 0;
757                 if (--fmtcnt >= 0)
758                     c = *fmt++;
759                 if (c == '*') {
760                     v = getnextarg(args, arglen, &argidx);
761                     if (v == NULL)
762                         goto error;
763                     if (!PyLong_Check(v)) {
764                         PyErr_SetString(
765                             PyExc_TypeError,
766                             "* wants int");
767                         goto error;
768                     }
769                     prec = _PyLong_AsInt(v);
770                     if (prec == -1 && PyErr_Occurred())
771                         goto error;
772                     if (prec < 0)
773                         prec = 0;
774                     if (--fmtcnt >= 0)
775                         c = *fmt++;
776                 }
777                 else if (c >= 0 && isdigit(c)) {
778                     prec = c - '0';
779                     while (--fmtcnt >= 0) {
780                         c = Py_CHARMASK(*fmt++);
781                         if (!isdigit(c))
782                             break;
783                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
784                             PyErr_SetString(
785                                 PyExc_ValueError,
786                                 "prec too big");
787                             goto error;
788                         }
789                         prec = prec*10 + (c - '0');
790                     }
791                 }
792             } /* prec */
793             if (fmtcnt >= 0) {
794                 if (c == 'h' || c == 'l' || c == 'L') {
795                     if (--fmtcnt >= 0)
796                         c = *fmt++;
797                 }
798             }
799             if (fmtcnt < 0) {
800                 PyErr_SetString(PyExc_ValueError,
801                                 "incomplete format");
802                 goto error;
803             }
804             v = getnextarg(args, arglen, &argidx);
805             if (v == NULL)
806                 goto error;
807 
808             if (fmtcnt == 0) {
809                 /* last write: disable writer overallocation */
810                 writer.overallocate = 0;
811             }
812 
813             sign = 0;
814             fill = ' ';
815             switch (c) {
816             case 'r':
817                 // %r is only for 2/3 code; 3 only code should use %a
818             case 'a':
819                 temp = PyObject_ASCII(v);
820                 if (temp == NULL)
821                     goto error;
822                 assert(PyUnicode_IS_ASCII(temp));
823                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
824                 len = PyUnicode_GET_LENGTH(temp);
825                 if (prec >= 0 && len > prec)
826                     len = prec;
827                 break;
828 
829             case 's':
830                 // %s is only for 2/3 code; 3 only code should use %b
831             case 'b':
832                 temp = format_obj(v, &pbuf, &len);
833                 if (temp == NULL)
834                     goto error;
835                 if (prec >= 0 && len > prec)
836                     len = prec;
837                 break;
838 
839             case 'i':
840             case 'd':
841             case 'u':
842             case 'o':
843             case 'x':
844             case 'X':
845                 if (PyLong_CheckExact(v)
846                     && width == -1 && prec == -1
847                     && !(flags & (F_SIGN | F_BLANK))
848                     && c != 'X')
849                 {
850                     /* Fast path */
851                     int alternate = flags & F_ALT;
852                     int base;
853 
854                     switch(c)
855                     {
856                         default:
857                             Py_UNREACHABLE();
858                         case 'd':
859                         case 'i':
860                         case 'u':
861                             base = 10;
862                             break;
863                         case 'o':
864                             base = 8;
865                             break;
866                         case 'x':
867                         case 'X':
868                             base = 16;
869                             break;
870                     }
871 
872                     /* Fast path */
873                     writer.min_size -= 2; /* size preallocated for "%d" */
874                     res = _PyLong_FormatBytesWriter(&writer, res,
875                                                     v, base, alternate);
876                     if (res == NULL)
877                         goto error;
878                     continue;
879                 }
880 
881                 temp = formatlong(v, flags, prec, c);
882                 if (!temp)
883                     goto error;
884                 assert(PyUnicode_IS_ASCII(temp));
885                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
886                 len = PyUnicode_GET_LENGTH(temp);
887                 sign = 1;
888                 if (flags & F_ZERO)
889                     fill = '0';
890                 break;
891 
892             case 'e':
893             case 'E':
894             case 'f':
895             case 'F':
896             case 'g':
897             case 'G':
898                 if (width == -1 && prec == -1
899                     && !(flags & (F_SIGN | F_BLANK)))
900                 {
901                     /* Fast path */
902                     writer.min_size -= 2; /* size preallocated for "%f" */
903                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
904                     if (res == NULL)
905                         goto error;
906                     continue;
907                 }
908 
909                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
910                     goto error;
911                 pbuf = PyBytes_AS_STRING(temp);
912                 len = PyBytes_GET_SIZE(temp);
913                 sign = 1;
914                 if (flags & F_ZERO)
915                     fill = '0';
916                 break;
917 
918             case 'c':
919                 pbuf = &onechar;
920                 len = byte_converter(v, &onechar);
921                 if (!len)
922                     goto error;
923                 if (width == -1) {
924                     /* Fast path */
925                     *res++ = onechar;
926                     continue;
927                 }
928                 break;
929 
930             default:
931                 PyErr_Format(PyExc_ValueError,
932                   "unsupported format character '%c' (0x%x) "
933                   "at index %zd",
934                   c, c,
935                   (Py_ssize_t)(fmt - 1 - format));
936                 goto error;
937             }
938 
939             if (sign) {
940                 if (*pbuf == '-' || *pbuf == '+') {
941                     sign = *pbuf++;
942                     len--;
943                 }
944                 else if (flags & F_SIGN)
945                     sign = '+';
946                 else if (flags & F_BLANK)
947                     sign = ' ';
948                 else
949                     sign = 0;
950             }
951             if (width < len)
952                 width = len;
953 
954             alloc = width;
955             if (sign != 0 && len == width)
956                 alloc++;
957             /* 2: size preallocated for %s */
958             if (alloc > 2) {
959                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
960                 if (res == NULL)
961                     goto error;
962             }
963 #ifndef NDEBUG
964             char *before = res;
965 #endif
966 
967             /* Write the sign if needed */
968             if (sign) {
969                 if (fill != ' ')
970                     *res++ = sign;
971                 if (width > len)
972                     width--;
973             }
974 
975             /* Write the numeric prefix for "x", "X" and "o" formats
976                if the alternate form is used.
977                For example, write "0x" for the "%#x" format. */
978             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
979                 assert(pbuf[0] == '0');
980                 assert(pbuf[1] == c);
981                 if (fill != ' ') {
982                     *res++ = *pbuf++;
983                     *res++ = *pbuf++;
984                 }
985                 width -= 2;
986                 if (width < 0)
987                     width = 0;
988                 len -= 2;
989             }
990 
991             /* Pad left with the fill character if needed */
992             if (width > len && !(flags & F_LJUST)) {
993                 memset(res, fill, width - len);
994                 res += (width - len);
995                 width = len;
996             }
997 
998             /* If padding with spaces: write sign if needed and/or numeric
999                prefix if the alternate form is used */
1000             if (fill == ' ') {
1001                 if (sign)
1002                     *res++ = sign;
1003                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1004                     assert(pbuf[0] == '0');
1005                     assert(pbuf[1] == c);
1006                     *res++ = *pbuf++;
1007                     *res++ = *pbuf++;
1008                 }
1009             }
1010 
1011             /* Copy bytes */
1012             memcpy(res, pbuf, len);
1013             res += len;
1014 
1015             /* Pad right with the fill character if needed */
1016             if (width > len) {
1017                 memset(res, ' ', width - len);
1018                 res += (width - len);
1019             }
1020 
1021             if (dict && (argidx < arglen)) {
1022                 PyErr_SetString(PyExc_TypeError,
1023                            "not all arguments converted during bytes formatting");
1024                 Py_XDECREF(temp);
1025                 goto error;
1026             }
1027             Py_XDECREF(temp);
1028 
1029 #ifndef NDEBUG
1030             /* check that we computed the exact size for this write */
1031             assert((res - before) == alloc);
1032 #endif
1033         } /* '%' */
1034 
1035         /* If overallocation was disabled, ensure that it was the last
1036            write. Otherwise, we missed an optimization */
1037         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1038     } /* until end */
1039 
1040     if (argidx < arglen && !dict) {
1041         PyErr_SetString(PyExc_TypeError,
1042                         "not all arguments converted during bytes formatting");
1043         goto error;
1044     }
1045 
1046     if (args_owned) {
1047         Py_DECREF(args);
1048     }
1049     return _PyBytesWriter_Finish(&writer, res);
1050 
1051  error:
1052     _PyBytesWriter_Dealloc(&writer);
1053     if (args_owned) {
1054         Py_DECREF(args);
1055     }
1056     return NULL;
1057 }
1058 
1059 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape2(const char * s,Py_ssize_t len,const char * errors,int * first_invalid_escape_char,const char ** first_invalid_escape_ptr)1060 PyObject *_PyBytes_DecodeEscape2(const char *s,
1061                                 Py_ssize_t len,
1062                                 const char *errors,
1063                                 int *first_invalid_escape_char,
1064                                 const char **first_invalid_escape_ptr)
1065 {
1066     int c;
1067     char *p;
1068     const char *end;
1069     _PyBytesWriter writer;
1070 
1071     _PyBytesWriter_Init(&writer);
1072 
1073     p = _PyBytesWriter_Alloc(&writer, len);
1074     if (p == NULL)
1075         return NULL;
1076     writer.overallocate = 1;
1077 
1078     *first_invalid_escape_char = -1;
1079     *first_invalid_escape_ptr = NULL;
1080 
1081     end = s + len;
1082     while (s < end) {
1083         if (*s != '\\') {
1084             *p++ = *s++;
1085             continue;
1086         }
1087 
1088         s++;
1089         if (s == end) {
1090             PyErr_SetString(PyExc_ValueError,
1091                             "Trailing \\ in string");
1092             goto failed;
1093         }
1094 
1095         switch (*s++) {
1096         /* XXX This assumes ASCII! */
1097         case '\n': break;
1098         case '\\': *p++ = '\\'; break;
1099         case '\'': *p++ = '\''; break;
1100         case '\"': *p++ = '\"'; break;
1101         case 'b': *p++ = '\b'; break;
1102         case 'f': *p++ = '\014'; break; /* FF */
1103         case 't': *p++ = '\t'; break;
1104         case 'n': *p++ = '\n'; break;
1105         case 'r': *p++ = '\r'; break;
1106         case 'v': *p++ = '\013'; break; /* VT */
1107         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1108         case '0': case '1': case '2': case '3':
1109         case '4': case '5': case '6': case '7':
1110             c = s[-1] - '0';
1111             if (s < end && '0' <= *s && *s <= '7') {
1112                 c = (c<<3) + *s++ - '0';
1113                 if (s < end && '0' <= *s && *s <= '7')
1114                     c = (c<<3) + *s++ - '0';
1115             }
1116             if (c > 0377) {
1117                 if (*first_invalid_escape_char == -1) {
1118                     *first_invalid_escape_char = c;
1119                     /* Back up 3 chars, since we've already incremented s. */
1120                     *first_invalid_escape_ptr = s - 3;
1121                 }
1122             }
1123             *p++ = c;
1124             break;
1125         case 'x':
1126             if (s+1 < end) {
1127                 int digit1, digit2;
1128                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1129                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1130                 if (digit1 < 16 && digit2 < 16) {
1131                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1132                     s += 2;
1133                     break;
1134                 }
1135             }
1136             /* invalid hexadecimal digits */
1137 
1138             if (!errors || strcmp(errors, "strict") == 0) {
1139                 PyErr_Format(PyExc_ValueError,
1140                              "invalid \\x escape at position %zd",
1141                              s - 2 - (end - len));
1142                 goto failed;
1143             }
1144             if (strcmp(errors, "replace") == 0) {
1145                 *p++ = '?';
1146             } else if (strcmp(errors, "ignore") == 0)
1147                 /* do nothing */;
1148             else {
1149                 PyErr_Format(PyExc_ValueError,
1150                              "decoding error; unknown "
1151                              "error handling code: %.400s",
1152                              errors);
1153                 goto failed;
1154             }
1155             /* skip \x */
1156             if (s < end && Py_ISXDIGIT(s[0]))
1157                 s++; /* and a hexdigit */
1158             break;
1159 
1160         default:
1161             if (*first_invalid_escape_char == -1) {
1162                 *first_invalid_escape_char = (unsigned char)s[-1];
1163                 /* Back up one char, since we've already incremented s. */
1164                 *first_invalid_escape_ptr = s - 1;
1165             }
1166             *p++ = '\\';
1167             s--;
1168         }
1169     }
1170 
1171     return _PyBytesWriter_Finish(&writer, p);
1172 
1173   failed:
1174     _PyBytesWriter_Dealloc(&writer);
1175     return NULL;
1176 }
1177 
1178 // Export for binary compatibility.
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1179 PyObject *_PyBytes_DecodeEscape(const char *s,
1180                                 Py_ssize_t len,
1181                                 const char *errors,
1182                                 const char **first_invalid_escape)
1183 {
1184     int first_invalid_escape_char;
1185     return _PyBytes_DecodeEscape2(
1186             s, len, errors,
1187             &first_invalid_escape_char,
1188             first_invalid_escape);
1189 }
1190 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1191 PyObject *PyBytes_DecodeEscape(const char *s,
1192                                 Py_ssize_t len,
1193                                 const char *errors,
1194                                 Py_ssize_t Py_UNUSED(unicode),
1195                                 const char *Py_UNUSED(recode_encoding))
1196 {
1197     int first_invalid_escape_char;
1198     const char *first_invalid_escape_ptr;
1199     PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1200                                              &first_invalid_escape_char,
1201                                              &first_invalid_escape_ptr);
1202     if (result == NULL)
1203         return NULL;
1204     if (first_invalid_escape_char != -1) {
1205         if (first_invalid_escape_char > 0xff) {
1206             char buf[12] = "";
1207             snprintf(buf, sizeof buf, "%o", first_invalid_escape_char);
1208             if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1209                                  "invalid octal escape sequence '\\%s'",
1210                                  buf) < 0)
1211             {
1212                 Py_DECREF(result);
1213                 return NULL;
1214             }
1215         }
1216         else {
1217             if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1218                                  "invalid escape sequence '\\%c'",
1219                                  first_invalid_escape_char) < 0)
1220             {
1221                 Py_DECREF(result);
1222                 return NULL;
1223             }
1224         }
1225     }
1226     return result;
1227 
1228 }
1229 /* -------------------------------------------------------------------- */
1230 /* object api */
1231 
1232 Py_ssize_t
PyBytes_Size(PyObject * op)1233 PyBytes_Size(PyObject *op)
1234 {
1235     if (!PyBytes_Check(op)) {
1236         PyErr_Format(PyExc_TypeError,
1237              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1238         return -1;
1239     }
1240     return Py_SIZE(op);
1241 }
1242 
1243 char *
PyBytes_AsString(PyObject * op)1244 PyBytes_AsString(PyObject *op)
1245 {
1246     if (!PyBytes_Check(op)) {
1247         PyErr_Format(PyExc_TypeError,
1248              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1249         return NULL;
1250     }
1251     return ((PyBytesObject *)op)->ob_sval;
1252 }
1253 
1254 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1255 PyBytes_AsStringAndSize(PyObject *obj,
1256                          char **s,
1257                          Py_ssize_t *len)
1258 {
1259     if (s == NULL) {
1260         PyErr_BadInternalCall();
1261         return -1;
1262     }
1263 
1264     if (!PyBytes_Check(obj)) {
1265         PyErr_Format(PyExc_TypeError,
1266              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1267         return -1;
1268     }
1269 
1270     *s = PyBytes_AS_STRING(obj);
1271     if (len != NULL)
1272         *len = PyBytes_GET_SIZE(obj);
1273     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1274         PyErr_SetString(PyExc_ValueError,
1275                         "embedded null byte");
1276         return -1;
1277     }
1278     return 0;
1279 }
1280 
1281 /* -------------------------------------------------------------------- */
1282 /* Methods */
1283 
1284 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1285 
1286 #include "stringlib/stringdefs.h"
1287 #define STRINGLIB_MUTABLE 0
1288 
1289 #include "stringlib/fastsearch.h"
1290 #include "stringlib/count.h"
1291 #include "stringlib/find.h"
1292 #include "stringlib/join.h"
1293 #include "stringlib/partition.h"
1294 #include "stringlib/split.h"
1295 #include "stringlib/ctype.h"
1296 
1297 #include "stringlib/transmogrify.h"
1298 
1299 #undef STRINGLIB_GET_EMPTY
1300 
1301 Py_ssize_t
_PyBytes_Find(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1302 _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1303               const char *needle, Py_ssize_t len_needle,
1304               Py_ssize_t offset)
1305 {
1306     return stringlib_find(haystack, len_haystack,
1307                           needle, len_needle, offset);
1308 }
1309 
1310 Py_ssize_t
_PyBytes_ReverseFind(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1311 _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1312                      const char *needle, Py_ssize_t len_needle,
1313                      Py_ssize_t offset)
1314 {
1315     return stringlib_rfind(haystack, len_haystack,
1316                            needle, len_needle, offset);
1317 }
1318 
1319 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1320 PyBytes_Repr(PyObject *obj, int smartquotes)
1321 {
1322     PyBytesObject* op = (PyBytesObject*) obj;
1323     Py_ssize_t i, length = Py_SIZE(op);
1324     Py_ssize_t newsize, squotes, dquotes;
1325     PyObject *v;
1326     unsigned char quote;
1327     const unsigned char *s;
1328     Py_UCS1 *p;
1329 
1330     /* Compute size of output string */
1331     squotes = dquotes = 0;
1332     newsize = 3; /* b'' */
1333     s = (const unsigned char*)op->ob_sval;
1334     for (i = 0; i < length; i++) {
1335         Py_ssize_t incr = 1;
1336         switch(s[i]) {
1337         case '\'': squotes++; break;
1338         case '"':  dquotes++; break;
1339         case '\\': case '\t': case '\n': case '\r':
1340             incr = 2; break; /* \C */
1341         default:
1342             if (s[i] < ' ' || s[i] >= 0x7f)
1343                 incr = 4; /* \xHH */
1344         }
1345         if (newsize > PY_SSIZE_T_MAX - incr)
1346             goto overflow;
1347         newsize += incr;
1348     }
1349     quote = '\'';
1350     if (smartquotes && squotes && !dquotes)
1351         quote = '"';
1352     if (squotes && quote == '\'') {
1353         if (newsize > PY_SSIZE_T_MAX - squotes)
1354             goto overflow;
1355         newsize += squotes;
1356     }
1357 
1358     v = PyUnicode_New(newsize, 127);
1359     if (v == NULL) {
1360         return NULL;
1361     }
1362     p = PyUnicode_1BYTE_DATA(v);
1363 
1364     *p++ = 'b', *p++ = quote;
1365     for (i = 0; i < length; i++) {
1366         unsigned char c = op->ob_sval[i];
1367         if (c == quote || c == '\\')
1368             *p++ = '\\', *p++ = c;
1369         else if (c == '\t')
1370             *p++ = '\\', *p++ = 't';
1371         else if (c == '\n')
1372             *p++ = '\\', *p++ = 'n';
1373         else if (c == '\r')
1374             *p++ = '\\', *p++ = 'r';
1375         else if (c < ' ' || c >= 0x7f) {
1376             *p++ = '\\';
1377             *p++ = 'x';
1378             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1379             *p++ = Py_hexdigits[c & 0xf];
1380         }
1381         else
1382             *p++ = c;
1383     }
1384     *p++ = quote;
1385     assert(_PyUnicode_CheckConsistency(v, 1));
1386     return v;
1387 
1388   overflow:
1389     PyErr_SetString(PyExc_OverflowError,
1390                     "bytes object is too large to make repr");
1391     return NULL;
1392 }
1393 
1394 static PyObject *
bytes_repr(PyObject * op)1395 bytes_repr(PyObject *op)
1396 {
1397     return PyBytes_Repr(op, 1);
1398 }
1399 
1400 static PyObject *
bytes_str(PyObject * op)1401 bytes_str(PyObject *op)
1402 {
1403     if (_Py_GetConfig()->bytes_warning) {
1404         if (PyErr_WarnEx(PyExc_BytesWarning,
1405                          "str() on a bytes instance", 1)) {
1406             return NULL;
1407         }
1408     }
1409     return bytes_repr(op);
1410 }
1411 
1412 static Py_ssize_t
bytes_length(PyBytesObject * a)1413 bytes_length(PyBytesObject *a)
1414 {
1415     return Py_SIZE(a);
1416 }
1417 
1418 /* This is also used by PyBytes_Concat() */
1419 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1420 bytes_concat(PyObject *a, PyObject *b)
1421 {
1422     Py_buffer va, vb;
1423     PyObject *result = NULL;
1424 
1425     va.len = -1;
1426     vb.len = -1;
1427     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1428         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1429         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1430                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1431         goto done;
1432     }
1433 
1434     /* Optimize end cases */
1435     if (va.len == 0 && PyBytes_CheckExact(b)) {
1436         result = b;
1437         Py_INCREF(result);
1438         goto done;
1439     }
1440     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1441         result = a;
1442         Py_INCREF(result);
1443         goto done;
1444     }
1445 
1446     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1447         PyErr_NoMemory();
1448         goto done;
1449     }
1450 
1451     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1452     if (result != NULL) {
1453         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1454         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1455     }
1456 
1457   done:
1458     if (va.len != -1)
1459         PyBuffer_Release(&va);
1460     if (vb.len != -1)
1461         PyBuffer_Release(&vb);
1462     return result;
1463 }
1464 
1465 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1466 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1467 {
1468     Py_ssize_t size;
1469     PyBytesObject *op;
1470     size_t nbytes;
1471     if (n < 0)
1472         n = 0;
1473     /* watch out for overflows:  the size can overflow int,
1474      * and the # of bytes needed can overflow size_t
1475      */
1476     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1477         PyErr_SetString(PyExc_OverflowError,
1478             "repeated bytes are too long");
1479         return NULL;
1480     }
1481     size = Py_SIZE(a) * n;
1482     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483         Py_INCREF(a);
1484         return (PyObject *)a;
1485     }
1486     nbytes = (size_t)size;
1487     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488         PyErr_SetString(PyExc_OverflowError,
1489             "repeated bytes are too long");
1490         return NULL;
1491     }
1492     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1493     if (op == NULL) {
1494         return PyErr_NoMemory();
1495     }
1496     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1497 _Py_COMP_DIAG_PUSH
1498 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1499     op->ob_shash = -1;
1500 _Py_COMP_DIAG_POP
1501     op->ob_sval[size] = '\0';
1502 
1503     _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1504 
1505     return (PyObject *) op;
1506 }
1507 
1508 static int
bytes_contains(PyObject * self,PyObject * arg)1509 bytes_contains(PyObject *self, PyObject *arg)
1510 {
1511     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1512 }
1513 
1514 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1515 bytes_item(PyBytesObject *a, Py_ssize_t i)
1516 {
1517     if (i < 0 || i >= Py_SIZE(a)) {
1518         PyErr_SetString(PyExc_IndexError, "index out of range");
1519         return NULL;
1520     }
1521     return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1522 }
1523 
1524 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1525 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1526 {
1527     int cmp;
1528     Py_ssize_t len;
1529 
1530     len = Py_SIZE(a);
1531     if (Py_SIZE(b) != len)
1532         return 0;
1533 
1534     if (a->ob_sval[0] != b->ob_sval[0])
1535         return 0;
1536 
1537     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1538     return (cmp == 0);
1539 }
1540 
1541 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1542 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1543 {
1544     int c;
1545     Py_ssize_t len_a, len_b;
1546     Py_ssize_t min_len;
1547 
1548     /* Make sure both arguments are strings. */
1549     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1550         if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1551             if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1552                 if (PyErr_WarnEx(PyExc_BytesWarning,
1553                                  "Comparison between bytes and string", 1))
1554                     return NULL;
1555             }
1556             if (PyLong_Check(a) || PyLong_Check(b)) {
1557                 if (PyErr_WarnEx(PyExc_BytesWarning,
1558                                  "Comparison between bytes and int", 1))
1559                     return NULL;
1560             }
1561         }
1562         Py_RETURN_NOTIMPLEMENTED;
1563     }
1564     else if (a == b) {
1565         switch (op) {
1566         case Py_EQ:
1567         case Py_LE:
1568         case Py_GE:
1569             /* a byte string is equal to itself */
1570             Py_RETURN_TRUE;
1571         case Py_NE:
1572         case Py_LT:
1573         case Py_GT:
1574             Py_RETURN_FALSE;
1575         default:
1576             PyErr_BadArgument();
1577             return NULL;
1578         }
1579     }
1580     else if (op == Py_EQ || op == Py_NE) {
1581         int eq = bytes_compare_eq(a, b);
1582         eq ^= (op == Py_NE);
1583         return PyBool_FromLong(eq);
1584     }
1585     else {
1586         len_a = Py_SIZE(a);
1587         len_b = Py_SIZE(b);
1588         min_len = Py_MIN(len_a, len_b);
1589         if (min_len > 0) {
1590             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1591             if (c == 0)
1592                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1593         }
1594         else
1595             c = 0;
1596         if (c != 0)
1597             Py_RETURN_RICHCOMPARE(c, 0, op);
1598         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1599     }
1600 }
1601 
1602 static Py_hash_t
bytes_hash(PyBytesObject * a)1603 bytes_hash(PyBytesObject *a)
1604 {
1605 _Py_COMP_DIAG_PUSH
1606 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1607     if (a->ob_shash == -1) {
1608         /* Can't fail */
1609         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1610     }
1611     return a->ob_shash;
1612 _Py_COMP_DIAG_POP
1613 }
1614 
1615 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1616 bytes_subscript(PyBytesObject* self, PyObject* item)
1617 {
1618     if (_PyIndex_Check(item)) {
1619         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1620         if (i == -1 && PyErr_Occurred())
1621             return NULL;
1622         if (i < 0)
1623             i += PyBytes_GET_SIZE(self);
1624         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1625             PyErr_SetString(PyExc_IndexError,
1626                             "index out of range");
1627             return NULL;
1628         }
1629         return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1630     }
1631     else if (PySlice_Check(item)) {
1632         Py_ssize_t start, stop, step, slicelength, i;
1633         size_t cur;
1634         const char* source_buf;
1635         char* result_buf;
1636         PyObject* result;
1637 
1638         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1639             return NULL;
1640         }
1641         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1642                                             &stop, step);
1643 
1644         if (slicelength <= 0) {
1645             return PyBytes_FromStringAndSize("", 0);
1646         }
1647         else if (start == 0 && step == 1 &&
1648                  slicelength == PyBytes_GET_SIZE(self) &&
1649                  PyBytes_CheckExact(self)) {
1650             Py_INCREF(self);
1651             return (PyObject *)self;
1652         }
1653         else if (step == 1) {
1654             return PyBytes_FromStringAndSize(
1655                 PyBytes_AS_STRING(self) + start,
1656                 slicelength);
1657         }
1658         else {
1659             source_buf = PyBytes_AS_STRING(self);
1660             result = PyBytes_FromStringAndSize(NULL, slicelength);
1661             if (result == NULL)
1662                 return NULL;
1663 
1664             result_buf = PyBytes_AS_STRING(result);
1665             for (cur = start, i = 0; i < slicelength;
1666                  cur += step, i++) {
1667                 result_buf[i] = source_buf[cur];
1668             }
1669 
1670             return result;
1671         }
1672     }
1673     else {
1674         PyErr_Format(PyExc_TypeError,
1675                      "byte indices must be integers or slices, not %.200s",
1676                      Py_TYPE(item)->tp_name);
1677         return NULL;
1678     }
1679 }
1680 
1681 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1682 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1683 {
1684     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1685                              1, flags);
1686 }
1687 
1688 static PySequenceMethods bytes_as_sequence = {
1689     (lenfunc)bytes_length, /*sq_length*/
1690     (binaryfunc)bytes_concat, /*sq_concat*/
1691     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1692     (ssizeargfunc)bytes_item, /*sq_item*/
1693     0,                  /*sq_slice*/
1694     0,                  /*sq_ass_item*/
1695     0,                  /*sq_ass_slice*/
1696     (objobjproc)bytes_contains /*sq_contains*/
1697 };
1698 
1699 static PyMappingMethods bytes_as_mapping = {
1700     (lenfunc)bytes_length,
1701     (binaryfunc)bytes_subscript,
1702     0,
1703 };
1704 
1705 static PyBufferProcs bytes_as_buffer = {
1706     (getbufferproc)bytes_buffer_getbuffer,
1707     NULL,
1708 };
1709 
1710 
1711 /*[clinic input]
1712 bytes.__bytes__
1713 Convert this value to exact type bytes.
1714 [clinic start generated code]*/
1715 
1716 static PyObject *
bytes___bytes___impl(PyBytesObject * self)1717 bytes___bytes___impl(PyBytesObject *self)
1718 /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1719 {
1720     if (PyBytes_CheckExact(self)) {
1721         Py_INCREF(self);
1722         return (PyObject *)self;
1723     }
1724     else {
1725         return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1726     }
1727 }
1728 
1729 
1730 #define LEFTSTRIP 0
1731 #define RIGHTSTRIP 1
1732 #define BOTHSTRIP 2
1733 
1734 /*[clinic input]
1735 bytes.split
1736 
1737     sep: object = None
1738         The delimiter according which to split the bytes.
1739         None (the default value) means split on ASCII whitespace characters
1740         (space, tab, return, newline, formfeed, vertical tab).
1741     maxsplit: Py_ssize_t = -1
1742         Maximum number of splits to do.
1743         -1 (the default value) means no limit.
1744 
1745 Return a list of the sections in the bytes, using sep as the delimiter.
1746 [clinic start generated code]*/
1747 
1748 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1749 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1750 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1751 {
1752     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1753     const char *s = PyBytes_AS_STRING(self), *sub;
1754     Py_buffer vsub;
1755     PyObject *list;
1756 
1757     if (maxsplit < 0)
1758         maxsplit = PY_SSIZE_T_MAX;
1759     if (sep == Py_None)
1760         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1761     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1762         return NULL;
1763     sub = vsub.buf;
1764     n = vsub.len;
1765 
1766     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1767     PyBuffer_Release(&vsub);
1768     return list;
1769 }
1770 
1771 /*[clinic input]
1772 bytes.partition
1773 
1774     sep: Py_buffer
1775     /
1776 
1777 Partition the bytes into three parts using the given separator.
1778 
1779 This will search for the separator sep in the bytes. If the separator is found,
1780 returns a 3-tuple containing the part before the separator, the separator
1781 itself, and the part after it.
1782 
1783 If the separator is not found, returns a 3-tuple containing the original bytes
1784 object and two empty bytes objects.
1785 [clinic start generated code]*/
1786 
1787 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1788 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1789 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1790 {
1791     return stringlib_partition(
1792         (PyObject*) self,
1793         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1794         sep->obj, (const char *)sep->buf, sep->len
1795         );
1796 }
1797 
1798 /*[clinic input]
1799 bytes.rpartition
1800 
1801     sep: Py_buffer
1802     /
1803 
1804 Partition the bytes into three parts using the given separator.
1805 
1806 This will search for the separator sep in the bytes, starting at the end. If
1807 the separator is found, returns a 3-tuple containing the part before the
1808 separator, the separator itself, and the part after it.
1809 
1810 If the separator is not found, returns a 3-tuple containing two empty bytes
1811 objects and the original bytes object.
1812 [clinic start generated code]*/
1813 
1814 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1815 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1816 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1817 {
1818     return stringlib_rpartition(
1819         (PyObject*) self,
1820         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1821         sep->obj, (const char *)sep->buf, sep->len
1822         );
1823 }
1824 
1825 /*[clinic input]
1826 bytes.rsplit = bytes.split
1827 
1828 Return a list of the sections in the bytes, using sep as the delimiter.
1829 
1830 Splitting is done starting at the end of the bytes and working to the front.
1831 [clinic start generated code]*/
1832 
1833 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1834 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1835 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1836 {
1837     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1838     const char *s = PyBytes_AS_STRING(self), *sub;
1839     Py_buffer vsub;
1840     PyObject *list;
1841 
1842     if (maxsplit < 0)
1843         maxsplit = PY_SSIZE_T_MAX;
1844     if (sep == Py_None)
1845         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1846     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1847         return NULL;
1848     sub = vsub.buf;
1849     n = vsub.len;
1850 
1851     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1852     PyBuffer_Release(&vsub);
1853     return list;
1854 }
1855 
1856 
1857 /*[clinic input]
1858 bytes.join
1859 
1860     iterable_of_bytes: object
1861     /
1862 
1863 Concatenate any number of bytes objects.
1864 
1865 The bytes whose method is called is inserted in between each pair.
1866 
1867 The result is returned as a new bytes object.
1868 
1869 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1870 [clinic start generated code]*/
1871 
1872 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1873 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1874 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1875 {
1876     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1877 }
1878 
1879 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1880 _PyBytes_Join(PyObject *sep, PyObject *x)
1881 {
1882     assert(sep != NULL && PyBytes_Check(sep));
1883     assert(x != NULL);
1884     return bytes_join((PyBytesObject*)sep, x);
1885 }
1886 
1887 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1888 bytes_find(PyBytesObject *self, PyObject *args)
1889 {
1890     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1891 }
1892 
1893 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1894 bytes_index(PyBytesObject *self, PyObject *args)
1895 {
1896     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1897 }
1898 
1899 
1900 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1901 bytes_rfind(PyBytesObject *self, PyObject *args)
1902 {
1903     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1904 }
1905 
1906 
1907 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1908 bytes_rindex(PyBytesObject *self, PyObject *args)
1909 {
1910     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1911 }
1912 
1913 
1914 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1915 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1916 {
1917     Py_buffer vsep;
1918     const char *s = PyBytes_AS_STRING(self);
1919     Py_ssize_t len = PyBytes_GET_SIZE(self);
1920     char *sep;
1921     Py_ssize_t seplen;
1922     Py_ssize_t i, j;
1923 
1924     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1925         return NULL;
1926     sep = vsep.buf;
1927     seplen = vsep.len;
1928 
1929     i = 0;
1930     if (striptype != RIGHTSTRIP) {
1931         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1932             i++;
1933         }
1934     }
1935 
1936     j = len;
1937     if (striptype != LEFTSTRIP) {
1938         do {
1939             j--;
1940         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1941         j++;
1942     }
1943 
1944     PyBuffer_Release(&vsep);
1945 
1946     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1947         Py_INCREF(self);
1948         return (PyObject*)self;
1949     }
1950     else
1951         return PyBytes_FromStringAndSize(s+i, j-i);
1952 }
1953 
1954 
1955 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1956 do_strip(PyBytesObject *self, int striptype)
1957 {
1958     const char *s = PyBytes_AS_STRING(self);
1959     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1960 
1961     i = 0;
1962     if (striptype != RIGHTSTRIP) {
1963         while (i < len && Py_ISSPACE(s[i])) {
1964             i++;
1965         }
1966     }
1967 
1968     j = len;
1969     if (striptype != LEFTSTRIP) {
1970         do {
1971             j--;
1972         } while (j >= i && Py_ISSPACE(s[j]));
1973         j++;
1974     }
1975 
1976     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1977         Py_INCREF(self);
1978         return (PyObject*)self;
1979     }
1980     else
1981         return PyBytes_FromStringAndSize(s+i, j-i);
1982 }
1983 
1984 
1985 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1986 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1987 {
1988     if (bytes != Py_None) {
1989         return do_xstrip(self, striptype, bytes);
1990     }
1991     return do_strip(self, striptype);
1992 }
1993 
1994 /*[clinic input]
1995 bytes.strip
1996 
1997     bytes: object = None
1998     /
1999 
2000 Strip leading and trailing bytes contained in the argument.
2001 
2002 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2003 [clinic start generated code]*/
2004 
2005 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2006 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2007 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2008 {
2009     return do_argstrip(self, BOTHSTRIP, bytes);
2010 }
2011 
2012 /*[clinic input]
2013 bytes.lstrip
2014 
2015     bytes: object = None
2016     /
2017 
2018 Strip leading bytes contained in the argument.
2019 
2020 If the argument is omitted or None, strip leading  ASCII whitespace.
2021 [clinic start generated code]*/
2022 
2023 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2024 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2025 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2026 {
2027     return do_argstrip(self, LEFTSTRIP, bytes);
2028 }
2029 
2030 /*[clinic input]
2031 bytes.rstrip
2032 
2033     bytes: object = None
2034     /
2035 
2036 Strip trailing bytes contained in the argument.
2037 
2038 If the argument is omitted or None, strip trailing ASCII whitespace.
2039 [clinic start generated code]*/
2040 
2041 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2042 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2043 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2044 {
2045     return do_argstrip(self, RIGHTSTRIP, bytes);
2046 }
2047 
2048 
2049 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2050 bytes_count(PyBytesObject *self, PyObject *args)
2051 {
2052     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2053 }
2054 
2055 
2056 /*[clinic input]
2057 bytes.translate
2058 
2059     table: object
2060         Translation table, which must be a bytes object of length 256.
2061     /
2062     delete as deletechars: object(c_default="NULL") = b''
2063 
2064 Return a copy with each character mapped by the given translation table.
2065 
2066 All characters occurring in the optional argument delete are removed.
2067 The remaining characters are mapped through the given translation table.
2068 [clinic start generated code]*/
2069 
2070 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2071 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2072                      PyObject *deletechars)
2073 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2074 {
2075     const char *input;
2076     char *output;
2077     Py_buffer table_view = {NULL, NULL};
2078     Py_buffer del_table_view = {NULL, NULL};
2079     const char *table_chars;
2080     Py_ssize_t i, c, changed = 0;
2081     PyObject *input_obj = (PyObject*)self;
2082     const char *output_start, *del_table_chars=NULL;
2083     Py_ssize_t inlen, tablen, dellen = 0;
2084     PyObject *result;
2085     int trans_table[256];
2086 
2087     if (PyBytes_Check(table)) {
2088         table_chars = PyBytes_AS_STRING(table);
2089         tablen = PyBytes_GET_SIZE(table);
2090     }
2091     else if (table == Py_None) {
2092         table_chars = NULL;
2093         tablen = 256;
2094     }
2095     else {
2096         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2097             return NULL;
2098         table_chars = table_view.buf;
2099         tablen = table_view.len;
2100     }
2101 
2102     if (tablen != 256) {
2103         PyErr_SetString(PyExc_ValueError,
2104           "translation table must be 256 characters long");
2105         PyBuffer_Release(&table_view);
2106         return NULL;
2107     }
2108 
2109     if (deletechars != NULL) {
2110         if (PyBytes_Check(deletechars)) {
2111             del_table_chars = PyBytes_AS_STRING(deletechars);
2112             dellen = PyBytes_GET_SIZE(deletechars);
2113         }
2114         else {
2115             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2116                 PyBuffer_Release(&table_view);
2117                 return NULL;
2118             }
2119             del_table_chars = del_table_view.buf;
2120             dellen = del_table_view.len;
2121         }
2122     }
2123     else {
2124         del_table_chars = NULL;
2125         dellen = 0;
2126     }
2127 
2128     inlen = PyBytes_GET_SIZE(input_obj);
2129     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2130     if (result == NULL) {
2131         PyBuffer_Release(&del_table_view);
2132         PyBuffer_Release(&table_view);
2133         return NULL;
2134     }
2135     output_start = output = PyBytes_AS_STRING(result);
2136     input = PyBytes_AS_STRING(input_obj);
2137 
2138     if (dellen == 0 && table_chars != NULL) {
2139         /* If no deletions are required, use faster code */
2140         for (i = inlen; --i >= 0; ) {
2141             c = Py_CHARMASK(*input++);
2142             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2143                 changed = 1;
2144         }
2145         if (!changed && PyBytes_CheckExact(input_obj)) {
2146             Py_INCREF(input_obj);
2147             Py_DECREF(result);
2148             result = input_obj;
2149         }
2150         PyBuffer_Release(&del_table_view);
2151         PyBuffer_Release(&table_view);
2152         return result;
2153     }
2154 
2155     if (table_chars == NULL) {
2156         for (i = 0; i < 256; i++)
2157             trans_table[i] = Py_CHARMASK(i);
2158     } else {
2159         for (i = 0; i < 256; i++)
2160             trans_table[i] = Py_CHARMASK(table_chars[i]);
2161     }
2162     PyBuffer_Release(&table_view);
2163 
2164     for (i = 0; i < dellen; i++)
2165         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2166     PyBuffer_Release(&del_table_view);
2167 
2168     for (i = inlen; --i >= 0; ) {
2169         c = Py_CHARMASK(*input++);
2170         if (trans_table[c] != -1)
2171             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2172                 continue;
2173         changed = 1;
2174     }
2175     if (!changed && PyBytes_CheckExact(input_obj)) {
2176         Py_DECREF(result);
2177         Py_INCREF(input_obj);
2178         return input_obj;
2179     }
2180     /* Fix the size of the resulting byte string */
2181     if (inlen > 0)
2182         _PyBytes_Resize(&result, output - output_start);
2183     return result;
2184 }
2185 
2186 
2187 /*[clinic input]
2188 
2189 @staticmethod
2190 bytes.maketrans
2191 
2192     frm: Py_buffer
2193     to: Py_buffer
2194     /
2195 
2196 Return a translation table useable for the bytes or bytearray translate method.
2197 
2198 The returned table will be one where each byte in frm is mapped to the byte at
2199 the same position in to.
2200 
2201 The bytes objects frm and to must be of the same length.
2202 [clinic start generated code]*/
2203 
2204 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2205 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2206 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2207 {
2208     return _Py_bytes_maketrans(frm, to);
2209 }
2210 
2211 
2212 /*[clinic input]
2213 bytes.replace
2214 
2215     old: Py_buffer
2216     new: Py_buffer
2217     count: Py_ssize_t = -1
2218         Maximum number of occurrences to replace.
2219         -1 (the default value) means replace all occurrences.
2220     /
2221 
2222 Return a copy with all occurrences of substring old replaced by new.
2223 
2224 If the optional argument count is given, only the first count occurrences are
2225 replaced.
2226 [clinic start generated code]*/
2227 
2228 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2229 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2230                    Py_ssize_t count)
2231 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2232 {
2233     return stringlib_replace((PyObject *)self,
2234                              (const char *)old->buf, old->len,
2235                              (const char *)new->buf, new->len, count);
2236 }
2237 
2238 /** End DALKE **/
2239 
2240 /*[clinic input]
2241 bytes.removeprefix as bytes_removeprefix
2242 
2243     prefix: Py_buffer
2244     /
2245 
2246 Return a bytes object with the given prefix string removed if present.
2247 
2248 If the bytes starts with the prefix string, return bytes[len(prefix):].
2249 Otherwise, return a copy of the original bytes.
2250 [clinic start generated code]*/
2251 
2252 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2253 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2254 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2255 {
2256     const char *self_start = PyBytes_AS_STRING(self);
2257     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2258     const char *prefix_start = prefix->buf;
2259     Py_ssize_t prefix_len = prefix->len;
2260 
2261     if (self_len >= prefix_len
2262         && prefix_len > 0
2263         && memcmp(self_start, prefix_start, prefix_len) == 0)
2264     {
2265         return PyBytes_FromStringAndSize(self_start + prefix_len,
2266                                          self_len - prefix_len);
2267     }
2268 
2269     if (PyBytes_CheckExact(self)) {
2270         Py_INCREF(self);
2271         return (PyObject *)self;
2272     }
2273 
2274     return PyBytes_FromStringAndSize(self_start, self_len);
2275 }
2276 
2277 /*[clinic input]
2278 bytes.removesuffix as bytes_removesuffix
2279 
2280     suffix: Py_buffer
2281     /
2282 
2283 Return a bytes object with the given suffix string removed if present.
2284 
2285 If the bytes ends with the suffix string and that suffix is not empty,
2286 return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2287 bytes.
2288 [clinic start generated code]*/
2289 
2290 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2291 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2292 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2293 {
2294     const char *self_start = PyBytes_AS_STRING(self);
2295     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2296     const char *suffix_start = suffix->buf;
2297     Py_ssize_t suffix_len = suffix->len;
2298 
2299     if (self_len >= suffix_len
2300         && suffix_len > 0
2301         && memcmp(self_start + self_len - suffix_len,
2302                   suffix_start, suffix_len) == 0)
2303     {
2304         return PyBytes_FromStringAndSize(self_start,
2305                                          self_len - suffix_len);
2306     }
2307 
2308     if (PyBytes_CheckExact(self)) {
2309         Py_INCREF(self);
2310         return (PyObject *)self;
2311     }
2312 
2313     return PyBytes_FromStringAndSize(self_start, self_len);
2314 }
2315 
2316 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2317 bytes_startswith(PyBytesObject *self, PyObject *args)
2318 {
2319     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2320 }
2321 
2322 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2323 bytes_endswith(PyBytesObject *self, PyObject *args)
2324 {
2325     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2326 }
2327 
2328 
2329 /*[clinic input]
2330 bytes.decode
2331 
2332     encoding: str(c_default="NULL") = 'utf-8'
2333         The encoding with which to decode the bytes.
2334     errors: str(c_default="NULL") = 'strict'
2335         The error handling scheme to use for the handling of decoding errors.
2336         The default is 'strict' meaning that decoding errors raise a
2337         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2338         as well as any other name registered with codecs.register_error that
2339         can handle UnicodeDecodeErrors.
2340 
2341 Decode the bytes using the codec registered for encoding.
2342 [clinic start generated code]*/
2343 
2344 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2345 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2346                   const char *errors)
2347 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2348 {
2349     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2350 }
2351 
2352 
2353 /*[clinic input]
2354 bytes.splitlines
2355 
2356     keepends: bool(accept={int}) = False
2357 
2358 Return a list of the lines in the bytes, breaking at line boundaries.
2359 
2360 Line breaks are not included in the resulting list unless keepends is given and
2361 true.
2362 [clinic start generated code]*/
2363 
2364 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2365 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2366 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2367 {
2368     return stringlib_splitlines(
2369         (PyObject*) self, PyBytes_AS_STRING(self),
2370         PyBytes_GET_SIZE(self), keepends
2371         );
2372 }
2373 
2374 /*[clinic input]
2375 @classmethod
2376 bytes.fromhex
2377 
2378     string: unicode
2379     /
2380 
2381 Create a bytes object from a string of hexadecimal numbers.
2382 
2383 Spaces between two numbers are accepted.
2384 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2385 [clinic start generated code]*/
2386 
2387 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2388 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2389 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2390 {
2391     PyObject *result = _PyBytes_FromHex(string, 0);
2392     if (type != &PyBytes_Type && result != NULL) {
2393         Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2394     }
2395     return result;
2396 }
2397 
2398 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2399 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2400 {
2401     char *buf;
2402     Py_ssize_t hexlen, invalid_char;
2403     unsigned int top, bot;
2404     const Py_UCS1 *str, *end;
2405     _PyBytesWriter writer;
2406 
2407     _PyBytesWriter_Init(&writer);
2408     writer.use_bytearray = use_bytearray;
2409 
2410     assert(PyUnicode_Check(string));
2411     if (PyUnicode_READY(string))
2412         return NULL;
2413     hexlen = PyUnicode_GET_LENGTH(string);
2414 
2415     if (!PyUnicode_IS_ASCII(string)) {
2416         const void *data = PyUnicode_DATA(string);
2417         unsigned int kind = PyUnicode_KIND(string);
2418         Py_ssize_t i;
2419 
2420         /* search for the first non-ASCII character */
2421         for (i = 0; i < hexlen; i++) {
2422             if (PyUnicode_READ(kind, data, i) >= 128)
2423                 break;
2424         }
2425         invalid_char = i;
2426         goto error;
2427     }
2428 
2429     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2430     str = PyUnicode_1BYTE_DATA(string);
2431 
2432     /* This overestimates if there are spaces */
2433     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2434     if (buf == NULL)
2435         return NULL;
2436 
2437     end = str + hexlen;
2438     while (str < end) {
2439         /* skip over spaces in the input */
2440         if (Py_ISSPACE(*str)) {
2441             do {
2442                 str++;
2443             } while (Py_ISSPACE(*str));
2444             if (str >= end)
2445                 break;
2446         }
2447 
2448         top = _PyLong_DigitValue[*str];
2449         if (top >= 16) {
2450             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2451             goto error;
2452         }
2453         str++;
2454 
2455         bot = _PyLong_DigitValue[*str];
2456         if (bot >= 16) {
2457             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2458             goto error;
2459         }
2460         str++;
2461 
2462         *buf++ = (unsigned char)((top << 4) + bot);
2463     }
2464 
2465     return _PyBytesWriter_Finish(&writer, buf);
2466 
2467   error:
2468     PyErr_Format(PyExc_ValueError,
2469                  "non-hexadecimal number found in "
2470                  "fromhex() arg at position %zd", invalid_char);
2471     _PyBytesWriter_Dealloc(&writer);
2472     return NULL;
2473 }
2474 
2475 /*[clinic input]
2476 bytes.hex
2477 
2478     sep: object = NULL
2479         An optional single character or byte to separate hex bytes.
2480     bytes_per_sep: int = 1
2481         How many bytes between separators.  Positive values count from the
2482         right, negative values count from the left.
2483 
2484 Create a string of hexadecimal numbers from a bytes object.
2485 
2486 Example:
2487 >>> value = b'\xb9\x01\xef'
2488 >>> value.hex()
2489 'b901ef'
2490 >>> value.hex(':')
2491 'b9:01:ef'
2492 >>> value.hex(':', 2)
2493 'b9:01ef'
2494 >>> value.hex(':', -2)
2495 'b901:ef'
2496 [clinic start generated code]*/
2497 
2498 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2499 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2500 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2501 {
2502     const char *argbuf = PyBytes_AS_STRING(self);
2503     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2504     return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2505 }
2506 
2507 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2508 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2509 {
2510     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2511 }
2512 
2513 
2514 static PyMethodDef
2515 bytes_methods[] = {
2516     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2517     BYTES___BYTES___METHODDEF
2518     {"capitalize", stringlib_capitalize, METH_NOARGS,
2519      _Py_capitalize__doc__},
2520     STRINGLIB_CENTER_METHODDEF
2521     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2522      _Py_count__doc__},
2523     BYTES_DECODE_METHODDEF
2524     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2525      _Py_endswith__doc__},
2526     STRINGLIB_EXPANDTABS_METHODDEF
2527     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2528      _Py_find__doc__},
2529     BYTES_FROMHEX_METHODDEF
2530     BYTES_HEX_METHODDEF
2531     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2532     {"isalnum", stringlib_isalnum, METH_NOARGS,
2533      _Py_isalnum__doc__},
2534     {"isalpha", stringlib_isalpha, METH_NOARGS,
2535      _Py_isalpha__doc__},
2536     {"isascii", stringlib_isascii, METH_NOARGS,
2537      _Py_isascii__doc__},
2538     {"isdigit", stringlib_isdigit, METH_NOARGS,
2539      _Py_isdigit__doc__},
2540     {"islower", stringlib_islower, METH_NOARGS,
2541      _Py_islower__doc__},
2542     {"isspace", stringlib_isspace, METH_NOARGS,
2543      _Py_isspace__doc__},
2544     {"istitle", stringlib_istitle, METH_NOARGS,
2545      _Py_istitle__doc__},
2546     {"isupper", stringlib_isupper, METH_NOARGS,
2547      _Py_isupper__doc__},
2548     BYTES_JOIN_METHODDEF
2549     STRINGLIB_LJUST_METHODDEF
2550     {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2551     BYTES_LSTRIP_METHODDEF
2552     BYTES_MAKETRANS_METHODDEF
2553     BYTES_PARTITION_METHODDEF
2554     BYTES_REPLACE_METHODDEF
2555     BYTES_REMOVEPREFIX_METHODDEF
2556     BYTES_REMOVESUFFIX_METHODDEF
2557     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2558     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2559     STRINGLIB_RJUST_METHODDEF
2560     BYTES_RPARTITION_METHODDEF
2561     BYTES_RSPLIT_METHODDEF
2562     BYTES_RSTRIP_METHODDEF
2563     BYTES_SPLIT_METHODDEF
2564     BYTES_SPLITLINES_METHODDEF
2565     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2566      _Py_startswith__doc__},
2567     BYTES_STRIP_METHODDEF
2568     {"swapcase", stringlib_swapcase, METH_NOARGS,
2569      _Py_swapcase__doc__},
2570     {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2571     BYTES_TRANSLATE_METHODDEF
2572     {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2573     STRINGLIB_ZFILL_METHODDEF
2574     {NULL,     NULL}                         /* sentinel */
2575 };
2576 
2577 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2578 bytes_mod(PyObject *self, PyObject *arg)
2579 {
2580     if (!PyBytes_Check(self)) {
2581         Py_RETURN_NOTIMPLEMENTED;
2582     }
2583     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2584                              arg, 0);
2585 }
2586 
2587 static PyNumberMethods bytes_as_number = {
2588     0,              /*nb_add*/
2589     0,              /*nb_subtract*/
2590     0,              /*nb_multiply*/
2591     bytes_mod,      /*nb_remainder*/
2592 };
2593 
2594 static PyObject *
2595 bytes_subtype_new(PyTypeObject *, PyObject *);
2596 
2597 /*[clinic input]
2598 @classmethod
2599 bytes.__new__ as bytes_new
2600 
2601     source as x: object = NULL
2602     encoding: str = NULL
2603     errors: str = NULL
2604 
2605 [clinic start generated code]*/
2606 
2607 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2608 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2609                const char *errors)
2610 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2611 {
2612     PyObject *bytes;
2613     PyObject *func;
2614     Py_ssize_t size;
2615 
2616     if (x == NULL) {
2617         if (encoding != NULL || errors != NULL) {
2618             PyErr_SetString(PyExc_TypeError,
2619                             encoding != NULL ?
2620                             "encoding without a string argument" :
2621                             "errors without a string argument");
2622             return NULL;
2623         }
2624         bytes = PyBytes_FromStringAndSize(NULL, 0);
2625     }
2626     else if (encoding != NULL) {
2627         /* Encode via the codec registry */
2628         if (!PyUnicode_Check(x)) {
2629             PyErr_SetString(PyExc_TypeError,
2630                             "encoding without a string argument");
2631             return NULL;
2632         }
2633         bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2634     }
2635     else if (errors != NULL) {
2636         PyErr_SetString(PyExc_TypeError,
2637                         PyUnicode_Check(x) ?
2638                         "string argument without an encoding" :
2639                         "errors without a string argument");
2640         return NULL;
2641     }
2642     /* We'd like to call PyObject_Bytes here, but we need to check for an
2643        integer argument before deferring to PyBytes_FromObject, something
2644        PyObject_Bytes doesn't do. */
2645     else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2646         bytes = _PyObject_CallNoArgs(func);
2647         Py_DECREF(func);
2648         if (bytes == NULL)
2649             return NULL;
2650         if (!PyBytes_Check(bytes)) {
2651             PyErr_Format(PyExc_TypeError,
2652                         "__bytes__ returned non-bytes (type %.200s)",
2653                         Py_TYPE(bytes)->tp_name);
2654             Py_DECREF(bytes);
2655             return NULL;
2656         }
2657     }
2658     else if (PyErr_Occurred())
2659         return NULL;
2660     else if (PyUnicode_Check(x)) {
2661         PyErr_SetString(PyExc_TypeError,
2662                         "string argument without an encoding");
2663         return NULL;
2664     }
2665     /* Is it an integer? */
2666     else if (_PyIndex_Check(x)) {
2667         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2668         if (size == -1 && PyErr_Occurred()) {
2669             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2670                 return NULL;
2671             PyErr_Clear();  /* fall through */
2672             bytes = PyBytes_FromObject(x);
2673         }
2674         else {
2675             if (size < 0) {
2676                 PyErr_SetString(PyExc_ValueError, "negative count");
2677                 return NULL;
2678             }
2679             bytes = _PyBytes_FromSize(size, 1);
2680         }
2681     }
2682     else {
2683         bytes = PyBytes_FromObject(x);
2684     }
2685 
2686     if (bytes != NULL && type != &PyBytes_Type) {
2687         Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2688     }
2689 
2690     return bytes;
2691 }
2692 
2693 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2694 _PyBytes_FromBuffer(PyObject *x)
2695 {
2696     PyObject *new;
2697     Py_buffer view;
2698 
2699     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2700         return NULL;
2701 
2702     new = PyBytes_FromStringAndSize(NULL, view.len);
2703     if (!new)
2704         goto fail;
2705     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2706                 &view, view.len, 'C') < 0)
2707         goto fail;
2708     PyBuffer_Release(&view);
2709     return new;
2710 
2711 fail:
2712     Py_XDECREF(new);
2713     PyBuffer_Release(&view);
2714     return NULL;
2715 }
2716 
2717 static PyObject*
_PyBytes_FromList(PyObject * x)2718 _PyBytes_FromList(PyObject *x)
2719 {
2720     Py_ssize_t i, size = PyList_GET_SIZE(x);
2721     Py_ssize_t value;
2722     char *str;
2723     PyObject *item;
2724     _PyBytesWriter writer;
2725 
2726     _PyBytesWriter_Init(&writer);
2727     str = _PyBytesWriter_Alloc(&writer, size);
2728     if (str == NULL)
2729         return NULL;
2730     writer.overallocate = 1;
2731     size = writer.allocated;
2732 
2733     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2734         item = PyList_GET_ITEM(x, i);
2735         Py_INCREF(item);
2736         value = PyNumber_AsSsize_t(item, NULL);
2737         Py_DECREF(item);
2738         if (value == -1 && PyErr_Occurred())
2739             goto error;
2740 
2741         if (value < 0 || value >= 256) {
2742             PyErr_SetString(PyExc_ValueError,
2743                             "bytes must be in range(0, 256)");
2744             goto error;
2745         }
2746 
2747         if (i >= size) {
2748             str = _PyBytesWriter_Resize(&writer, str, size+1);
2749             if (str == NULL)
2750                 return NULL;
2751             size = writer.allocated;
2752         }
2753         *str++ = (char) value;
2754     }
2755     return _PyBytesWriter_Finish(&writer, str);
2756 
2757   error:
2758     _PyBytesWriter_Dealloc(&writer);
2759     return NULL;
2760 }
2761 
2762 static PyObject*
_PyBytes_FromTuple(PyObject * x)2763 _PyBytes_FromTuple(PyObject *x)
2764 {
2765     PyObject *bytes;
2766     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2767     Py_ssize_t value;
2768     char *str;
2769     PyObject *item;
2770 
2771     bytes = PyBytes_FromStringAndSize(NULL, size);
2772     if (bytes == NULL)
2773         return NULL;
2774     str = ((PyBytesObject *)bytes)->ob_sval;
2775 
2776     for (i = 0; i < size; i++) {
2777         item = PyTuple_GET_ITEM(x, i);
2778         value = PyNumber_AsSsize_t(item, NULL);
2779         if (value == -1 && PyErr_Occurred())
2780             goto error;
2781 
2782         if (value < 0 || value >= 256) {
2783             PyErr_SetString(PyExc_ValueError,
2784                             "bytes must be in range(0, 256)");
2785             goto error;
2786         }
2787         *str++ = (char) value;
2788     }
2789     return bytes;
2790 
2791   error:
2792     Py_DECREF(bytes);
2793     return NULL;
2794 }
2795 
2796 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2797 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2798 {
2799     char *str;
2800     Py_ssize_t i, size;
2801     _PyBytesWriter writer;
2802 
2803     /* For iterator version, create a bytes object and resize as needed */
2804     size = PyObject_LengthHint(x, 64);
2805     if (size == -1 && PyErr_Occurred())
2806         return NULL;
2807 
2808     _PyBytesWriter_Init(&writer);
2809     str = _PyBytesWriter_Alloc(&writer, size);
2810     if (str == NULL)
2811         return NULL;
2812     writer.overallocate = 1;
2813     size = writer.allocated;
2814 
2815     /* Run the iterator to exhaustion */
2816     for (i = 0; ; i++) {
2817         PyObject *item;
2818         Py_ssize_t value;
2819 
2820         /* Get the next item */
2821         item = PyIter_Next(it);
2822         if (item == NULL) {
2823             if (PyErr_Occurred())
2824                 goto error;
2825             break;
2826         }
2827 
2828         /* Interpret it as an int (__index__) */
2829         value = PyNumber_AsSsize_t(item, NULL);
2830         Py_DECREF(item);
2831         if (value == -1 && PyErr_Occurred())
2832             goto error;
2833 
2834         /* Range check */
2835         if (value < 0 || value >= 256) {
2836             PyErr_SetString(PyExc_ValueError,
2837                             "bytes must be in range(0, 256)");
2838             goto error;
2839         }
2840 
2841         /* Append the byte */
2842         if (i >= size) {
2843             str = _PyBytesWriter_Resize(&writer, str, size+1);
2844             if (str == NULL)
2845                 return NULL;
2846             size = writer.allocated;
2847         }
2848         *str++ = (char) value;
2849     }
2850 
2851     return _PyBytesWriter_Finish(&writer, str);
2852 
2853   error:
2854     _PyBytesWriter_Dealloc(&writer);
2855     return NULL;
2856 }
2857 
2858 PyObject *
PyBytes_FromObject(PyObject * x)2859 PyBytes_FromObject(PyObject *x)
2860 {
2861     PyObject *it, *result;
2862 
2863     if (x == NULL) {
2864         PyErr_BadInternalCall();
2865         return NULL;
2866     }
2867 
2868     if (PyBytes_CheckExact(x)) {
2869         Py_INCREF(x);
2870         return x;
2871     }
2872 
2873     /* Use the modern buffer interface */
2874     if (PyObject_CheckBuffer(x))
2875         return _PyBytes_FromBuffer(x);
2876 
2877     if (PyList_CheckExact(x))
2878         return _PyBytes_FromList(x);
2879 
2880     if (PyTuple_CheckExact(x))
2881         return _PyBytes_FromTuple(x);
2882 
2883     if (!PyUnicode_Check(x)) {
2884         it = PyObject_GetIter(x);
2885         if (it != NULL) {
2886             result = _PyBytes_FromIterator(it, x);
2887             Py_DECREF(it);
2888             return result;
2889         }
2890         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2891             return NULL;
2892         }
2893     }
2894 
2895     PyErr_Format(PyExc_TypeError,
2896                  "cannot convert '%.200s' object to bytes",
2897                  Py_TYPE(x)->tp_name);
2898     return NULL;
2899 }
2900 
2901 /* This allocator is needed for subclasses don't want to use __new__.
2902  * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2903  *
2904  * This allocator will be removed when ob_shash is removed.
2905  */
2906 static PyObject *
bytes_alloc(PyTypeObject * self,Py_ssize_t nitems)2907 bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2908 {
2909     PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2910     if (obj == NULL) {
2911         return NULL;
2912     }
2913 _Py_COMP_DIAG_PUSH
2914 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2915     obj->ob_shash = -1;
2916 _Py_COMP_DIAG_POP
2917     return (PyObject*)obj;
2918 }
2919 
2920 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2921 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2922 {
2923     PyObject *pnew;
2924     Py_ssize_t n;
2925 
2926     assert(PyType_IsSubtype(type, &PyBytes_Type));
2927     assert(PyBytes_Check(tmp));
2928     n = PyBytes_GET_SIZE(tmp);
2929     pnew = type->tp_alloc(type, n);
2930     if (pnew != NULL) {
2931         memcpy(PyBytes_AS_STRING(pnew),
2932                   PyBytes_AS_STRING(tmp), n+1);
2933 _Py_COMP_DIAG_PUSH
2934 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2935         ((PyBytesObject *)pnew)->ob_shash =
2936             ((PyBytesObject *)tmp)->ob_shash;
2937 _Py_COMP_DIAG_POP
2938     }
2939     return pnew;
2940 }
2941 
2942 PyDoc_STRVAR(bytes_doc,
2943 "bytes(iterable_of_ints) -> bytes\n\
2944 bytes(string, encoding[, errors]) -> bytes\n\
2945 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2946 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2947 bytes() -> empty bytes object\n\
2948 \n\
2949 Construct an immutable array of bytes from:\n\
2950   - an iterable yielding integers in range(256)\n\
2951   - a text string encoded using the specified encoding\n\
2952   - any object implementing the buffer API.\n\
2953   - an integer");
2954 
2955 static PyObject *bytes_iter(PyObject *seq);
2956 
2957 PyTypeObject PyBytes_Type = {
2958     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2959     "bytes",
2960     PyBytesObject_SIZE,
2961     sizeof(char),
2962     0,                                          /* tp_dealloc */
2963     0,                                          /* tp_vectorcall_offset */
2964     0,                                          /* tp_getattr */
2965     0,                                          /* tp_setattr */
2966     0,                                          /* tp_as_async */
2967     (reprfunc)bytes_repr,                       /* tp_repr */
2968     &bytes_as_number,                           /* tp_as_number */
2969     &bytes_as_sequence,                         /* tp_as_sequence */
2970     &bytes_as_mapping,                          /* tp_as_mapping */
2971     (hashfunc)bytes_hash,                       /* tp_hash */
2972     0,                                          /* tp_call */
2973     bytes_str,                                  /* tp_str */
2974     PyObject_GenericGetAttr,                    /* tp_getattro */
2975     0,                                          /* tp_setattro */
2976     &bytes_as_buffer,                           /* tp_as_buffer */
2977     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2978         Py_TPFLAGS_BYTES_SUBCLASS |
2979         _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
2980     bytes_doc,                                  /* tp_doc */
2981     0,                                          /* tp_traverse */
2982     0,                                          /* tp_clear */
2983     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2984     0,                                          /* tp_weaklistoffset */
2985     bytes_iter,                                 /* tp_iter */
2986     0,                                          /* tp_iternext */
2987     bytes_methods,                              /* tp_methods */
2988     0,                                          /* tp_members */
2989     0,                                          /* tp_getset */
2990     0,                                          /* tp_base */
2991     0,                                          /* tp_dict */
2992     0,                                          /* tp_descr_get */
2993     0,                                          /* tp_descr_set */
2994     0,                                          /* tp_dictoffset */
2995     0,                                          /* tp_init */
2996     bytes_alloc,                                /* tp_alloc */
2997     bytes_new,                                  /* tp_new */
2998     PyObject_Del,                               /* tp_free */
2999 };
3000 
3001 void
PyBytes_Concat(PyObject ** pv,PyObject * w)3002 PyBytes_Concat(PyObject **pv, PyObject *w)
3003 {
3004     assert(pv != NULL);
3005     if (*pv == NULL)
3006         return;
3007     if (w == NULL) {
3008         Py_CLEAR(*pv);
3009         return;
3010     }
3011 
3012     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3013         /* Only one reference, so we can resize in place */
3014         Py_ssize_t oldsize;
3015         Py_buffer wb;
3016 
3017         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3018             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3019                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3020             Py_CLEAR(*pv);
3021             return;
3022         }
3023 
3024         oldsize = PyBytes_GET_SIZE(*pv);
3025         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3026             PyErr_NoMemory();
3027             goto error;
3028         }
3029         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3030             goto error;
3031 
3032         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3033         PyBuffer_Release(&wb);
3034         return;
3035 
3036       error:
3037         PyBuffer_Release(&wb);
3038         Py_CLEAR(*pv);
3039         return;
3040     }
3041 
3042     else {
3043         /* Multiple references, need to create new object */
3044         PyObject *v;
3045         v = bytes_concat(*pv, w);
3046         Py_SETREF(*pv, v);
3047     }
3048 }
3049 
3050 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)3051 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3052 {
3053     PyBytes_Concat(pv, w);
3054     Py_XDECREF(w);
3055 }
3056 
3057 
3058 /* The following function breaks the notion that bytes are immutable:
3059    it changes the size of a bytes object.  We get away with this only if there
3060    is only one module referencing the object.  You can also think of it
3061    as creating a new bytes object and destroying the old one, only
3062    more efficiently.  In any case, don't use this if the bytes object may
3063    already be known to some other part of the code...
3064    Note that if there's not enough memory to resize the bytes object, the
3065    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3066    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3067    returned, and the value in *pv may or may not be the same as on input.
3068    As always, an extra byte is allocated for a trailing \0 byte (newsize
3069    does *not* include that), and a trailing \0 byte is stored.
3070 */
3071 
3072 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3073 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3074 {
3075     PyObject *v;
3076     PyBytesObject *sv;
3077     v = *pv;
3078     if (!PyBytes_Check(v) || newsize < 0) {
3079         goto error;
3080     }
3081     if (Py_SIZE(v) == newsize) {
3082         /* return early if newsize equals to v->ob_size */
3083         return 0;
3084     }
3085     if (Py_SIZE(v) == 0) {
3086         if (newsize == 0) {
3087             return 0;
3088         }
3089         *pv = _PyBytes_FromSize(newsize, 0);
3090         Py_DECREF(v);
3091         return (*pv == NULL) ? -1 : 0;
3092     }
3093     if (Py_REFCNT(v) != 1) {
3094         goto error;
3095     }
3096     if (newsize == 0) {
3097         *pv = bytes_new_empty();
3098         Py_DECREF(v);
3099         return 0;
3100     }
3101     /* XXX UNREF/NEWREF interface should be more symmetrical */
3102 #ifdef Py_REF_DEBUG
3103     _Py_RefTotal--;
3104 #endif
3105 #ifdef Py_TRACE_REFS
3106     _Py_ForgetReference(v);
3107 #endif
3108     *pv = (PyObject *)
3109         PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3110     if (*pv == NULL) {
3111         PyObject_Free(v);
3112         PyErr_NoMemory();
3113         return -1;
3114     }
3115     _Py_NewReference(*pv);
3116     sv = (PyBytesObject *) *pv;
3117     Py_SET_SIZE(sv, newsize);
3118     sv->ob_sval[newsize] = '\0';
3119 _Py_COMP_DIAG_PUSH
3120 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
3121     sv->ob_shash = -1;          /* invalidate cached hash value */
3122 _Py_COMP_DIAG_POP
3123     return 0;
3124 error:
3125     *pv = 0;
3126     Py_DECREF(v);
3127     PyErr_BadInternalCall();
3128     return -1;
3129 }
3130 
3131 
3132 PyStatus
_PyBytes_InitTypes(PyInterpreterState * interp)3133 _PyBytes_InitTypes(PyInterpreterState *interp)
3134 {
3135     if (!_Py_IsMainInterpreter(interp)) {
3136         return _PyStatus_OK();
3137     }
3138 
3139     if (PyType_Ready(&PyBytes_Type) < 0) {
3140         return _PyStatus_ERR("Can't initialize bytes type");
3141     }
3142 
3143     if (PyType_Ready(&PyBytesIter_Type) < 0) {
3144         return _PyStatus_ERR("Can't initialize bytes iterator type");
3145     }
3146 
3147     return _PyStatus_OK();
3148 }
3149 
3150 
3151 /*********************** Bytes Iterator ****************************/
3152 
3153 typedef struct {
3154     PyObject_HEAD
3155     Py_ssize_t it_index;
3156     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3157 } striterobject;
3158 
3159 static void
striter_dealloc(striterobject * it)3160 striter_dealloc(striterobject *it)
3161 {
3162     _PyObject_GC_UNTRACK(it);
3163     Py_XDECREF(it->it_seq);
3164     PyObject_GC_Del(it);
3165 }
3166 
3167 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3168 striter_traverse(striterobject *it, visitproc visit, void *arg)
3169 {
3170     Py_VISIT(it->it_seq);
3171     return 0;
3172 }
3173 
3174 static PyObject *
striter_next(striterobject * it)3175 striter_next(striterobject *it)
3176 {
3177     PyBytesObject *seq;
3178 
3179     assert(it != NULL);
3180     seq = it->it_seq;
3181     if (seq == NULL)
3182         return NULL;
3183     assert(PyBytes_Check(seq));
3184 
3185     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3186         return _PyLong_FromUnsignedChar(
3187             (unsigned char)seq->ob_sval[it->it_index++]);
3188     }
3189 
3190     it->it_seq = NULL;
3191     Py_DECREF(seq);
3192     return NULL;
3193 }
3194 
3195 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3196 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3197 {
3198     Py_ssize_t len = 0;
3199     if (it->it_seq)
3200         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3201     return PyLong_FromSsize_t(len);
3202 }
3203 
3204 PyDoc_STRVAR(length_hint_doc,
3205              "Private method returning an estimate of len(list(it)).");
3206 
3207 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3208 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3209 {
3210     PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3211 
3212     /* _PyEval_GetBuiltin can invoke arbitrary code,
3213      * call must be before access of iterator pointers.
3214      * see issue #101765 */
3215 
3216     if (it->it_seq != NULL) {
3217         return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3218     } else {
3219         return Py_BuildValue("N(())", iter);
3220     }
3221 }
3222 
3223 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3224 
3225 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3226 striter_setstate(striterobject *it, PyObject *state)
3227 {
3228     Py_ssize_t index = PyLong_AsSsize_t(state);
3229     if (index == -1 && PyErr_Occurred())
3230         return NULL;
3231     if (it->it_seq != NULL) {
3232         if (index < 0)
3233             index = 0;
3234         else if (index > PyBytes_GET_SIZE(it->it_seq))
3235             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3236         it->it_index = index;
3237     }
3238     Py_RETURN_NONE;
3239 }
3240 
3241 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3242 
3243 static PyMethodDef striter_methods[] = {
3244     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3245      length_hint_doc},
3246     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3247      reduce_doc},
3248     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3249      setstate_doc},
3250     {NULL,              NULL}           /* sentinel */
3251 };
3252 
3253 PyTypeObject PyBytesIter_Type = {
3254     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3255     "bytes_iterator",                           /* tp_name */
3256     sizeof(striterobject),                      /* tp_basicsize */
3257     0,                                          /* tp_itemsize */
3258     /* methods */
3259     (destructor)striter_dealloc,                /* tp_dealloc */
3260     0,                                          /* tp_vectorcall_offset */
3261     0,                                          /* tp_getattr */
3262     0,                                          /* tp_setattr */
3263     0,                                          /* tp_as_async */
3264     0,                                          /* tp_repr */
3265     0,                                          /* tp_as_number */
3266     0,                                          /* tp_as_sequence */
3267     0,                                          /* tp_as_mapping */
3268     0,                                          /* tp_hash */
3269     0,                                          /* tp_call */
3270     0,                                          /* tp_str */
3271     PyObject_GenericGetAttr,                    /* tp_getattro */
3272     0,                                          /* tp_setattro */
3273     0,                                          /* tp_as_buffer */
3274     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3275     0,                                          /* tp_doc */
3276     (traverseproc)striter_traverse,     /* tp_traverse */
3277     0,                                          /* tp_clear */
3278     0,                                          /* tp_richcompare */
3279     0,                                          /* tp_weaklistoffset */
3280     PyObject_SelfIter,                          /* tp_iter */
3281     (iternextfunc)striter_next,                 /* tp_iternext */
3282     striter_methods,                            /* tp_methods */
3283     0,
3284 };
3285 
3286 static PyObject *
bytes_iter(PyObject * seq)3287 bytes_iter(PyObject *seq)
3288 {
3289     striterobject *it;
3290 
3291     if (!PyBytes_Check(seq)) {
3292         PyErr_BadInternalCall();
3293         return NULL;
3294     }
3295     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3296     if (it == NULL)
3297         return NULL;
3298     it->it_index = 0;
3299     Py_INCREF(seq);
3300     it->it_seq = (PyBytesObject *)seq;
3301     _PyObject_GC_TRACK(it);
3302     return (PyObject *)it;
3303 }
3304 
3305 
3306 /* _PyBytesWriter API */
3307 
3308 #ifdef MS_WINDOWS
3309    /* On Windows, overallocate by 50% is the best factor */
3310 #  define OVERALLOCATE_FACTOR 2
3311 #else
3312    /* On Linux, overallocate by 25% is the best factor */
3313 #  define OVERALLOCATE_FACTOR 4
3314 #endif
3315 
3316 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3317 _PyBytesWriter_Init(_PyBytesWriter *writer)
3318 {
3319     /* Set all attributes before small_buffer to 0 */
3320     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3321 #ifndef NDEBUG
3322     memset(writer->small_buffer, PYMEM_CLEANBYTE,
3323            sizeof(writer->small_buffer));
3324 #endif
3325 }
3326 
3327 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3328 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3329 {
3330     Py_CLEAR(writer->buffer);
3331 }
3332 
3333 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3334 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3335 {
3336     if (writer->use_small_buffer) {
3337         assert(writer->buffer == NULL);
3338         return writer->small_buffer;
3339     }
3340     else if (writer->use_bytearray) {
3341         assert(writer->buffer != NULL);
3342         return PyByteArray_AS_STRING(writer->buffer);
3343     }
3344     else {
3345         assert(writer->buffer != NULL);
3346         return PyBytes_AS_STRING(writer->buffer);
3347     }
3348 }
3349 
3350 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3351 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3352 {
3353     const char *start = _PyBytesWriter_AsString(writer);
3354     assert(str != NULL);
3355     assert(str >= start);
3356     assert(str - start <= writer->allocated);
3357     return str - start;
3358 }
3359 
3360 #ifndef NDEBUG
3361 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3362 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3363 {
3364     const char *start, *end;
3365 
3366     if (writer->use_small_buffer) {
3367         assert(writer->buffer == NULL);
3368     }
3369     else {
3370         assert(writer->buffer != NULL);
3371         if (writer->use_bytearray)
3372             assert(PyByteArray_CheckExact(writer->buffer));
3373         else
3374             assert(PyBytes_CheckExact(writer->buffer));
3375         assert(Py_REFCNT(writer->buffer) == 1);
3376     }
3377 
3378     if (writer->use_bytearray) {
3379         /* bytearray has its own overallocation algorithm,
3380            writer overallocation must be disabled */
3381         assert(!writer->overallocate);
3382     }
3383 
3384     assert(0 <= writer->allocated);
3385     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3386     /* the last byte must always be null */
3387     start = _PyBytesWriter_AsString(writer);
3388     assert(start[writer->allocated] == 0);
3389 
3390     end = start + writer->allocated;
3391     assert(str != NULL);
3392     assert(start <= str && str <= end);
3393     return 1;
3394 }
3395 #endif
3396 
3397 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3398 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3399 {
3400     Py_ssize_t allocated, pos;
3401 
3402     assert(_PyBytesWriter_CheckConsistency(writer, str));
3403     assert(writer->allocated < size);
3404 
3405     allocated = size;
3406     if (writer->overallocate
3407         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3408         /* overallocate to limit the number of realloc() */
3409         allocated += allocated / OVERALLOCATE_FACTOR;
3410     }
3411 
3412     pos = _PyBytesWriter_GetSize(writer, str);
3413     if (!writer->use_small_buffer) {
3414         if (writer->use_bytearray) {
3415             if (PyByteArray_Resize(writer->buffer, allocated))
3416                 goto error;
3417             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3418                but we cannot use ob_alloc because bytes may need to be moved
3419                to use the whole buffer. bytearray uses an internal optimization
3420                to avoid moving or copying bytes when bytes are removed at the
3421                beginning (ex: del bytearray[:1]). */
3422         }
3423         else {
3424             if (_PyBytes_Resize(&writer->buffer, allocated))
3425                 goto error;
3426         }
3427     }
3428     else {
3429         /* convert from stack buffer to bytes object buffer */
3430         assert(writer->buffer == NULL);
3431 
3432         if (writer->use_bytearray)
3433             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3434         else
3435             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3436         if (writer->buffer == NULL)
3437             goto error;
3438 
3439         if (pos != 0) {
3440             char *dest;
3441             if (writer->use_bytearray)
3442                 dest = PyByteArray_AS_STRING(writer->buffer);
3443             else
3444                 dest = PyBytes_AS_STRING(writer->buffer);
3445             memcpy(dest,
3446                       writer->small_buffer,
3447                       pos);
3448         }
3449 
3450         writer->use_small_buffer = 0;
3451 #ifndef NDEBUG
3452         memset(writer->small_buffer, PYMEM_CLEANBYTE,
3453                sizeof(writer->small_buffer));
3454 #endif
3455     }
3456     writer->allocated = allocated;
3457 
3458     str = _PyBytesWriter_AsString(writer) + pos;
3459     assert(_PyBytesWriter_CheckConsistency(writer, str));
3460     return str;
3461 
3462 error:
3463     _PyBytesWriter_Dealloc(writer);
3464     return NULL;
3465 }
3466 
3467 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3468 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3469 {
3470     Py_ssize_t new_min_size;
3471 
3472     assert(_PyBytesWriter_CheckConsistency(writer, str));
3473     assert(size >= 0);
3474 
3475     if (size == 0) {
3476         /* nothing to do */
3477         return str;
3478     }
3479 
3480     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3481         PyErr_NoMemory();
3482         _PyBytesWriter_Dealloc(writer);
3483         return NULL;
3484     }
3485     new_min_size = writer->min_size + size;
3486 
3487     if (new_min_size > writer->allocated)
3488         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3489 
3490     writer->min_size = new_min_size;
3491     return str;
3492 }
3493 
3494 /* Allocate the buffer to write size bytes.
3495    Return the pointer to the beginning of buffer data.
3496    Raise an exception and return NULL on error. */
3497 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3498 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3499 {
3500     /* ensure that _PyBytesWriter_Alloc() is only called once */
3501     assert(writer->min_size == 0 && writer->buffer == NULL);
3502     assert(size >= 0);
3503 
3504     writer->use_small_buffer = 1;
3505 #ifndef NDEBUG
3506     writer->allocated = sizeof(writer->small_buffer) - 1;
3507     /* In debug mode, don't use the full small buffer because it is less
3508        efficient than bytes and bytearray objects to detect buffer underflow
3509        and buffer overflow. Use 10 bytes of the small buffer to test also
3510        code using the smaller buffer in debug mode.
3511 
3512        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3513        in debug mode to also be able to detect stack overflow when running
3514        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3515        if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3516        stack overflow. */
3517     writer->allocated = Py_MIN(writer->allocated, 10);
3518     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3519        to detect buffer overflow */
3520     writer->small_buffer[writer->allocated] = 0;
3521 #else
3522     writer->allocated = sizeof(writer->small_buffer);
3523 #endif
3524     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3525 }
3526 
3527 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3528 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3529 {
3530     Py_ssize_t size;
3531     PyObject *result;
3532 
3533     assert(_PyBytesWriter_CheckConsistency(writer, str));
3534 
3535     size = _PyBytesWriter_GetSize(writer, str);
3536     if (size == 0 && !writer->use_bytearray) {
3537         Py_CLEAR(writer->buffer);
3538         /* Get the empty byte string singleton */
3539         result = PyBytes_FromStringAndSize(NULL, 0);
3540     }
3541     else if (writer->use_small_buffer) {
3542         if (writer->use_bytearray) {
3543             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3544         }
3545         else {
3546             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3547         }
3548     }
3549     else {
3550         result = writer->buffer;
3551         writer->buffer = NULL;
3552 
3553         if (size != writer->allocated) {
3554             if (writer->use_bytearray) {
3555                 if (PyByteArray_Resize(result, size)) {
3556                     Py_DECREF(result);
3557                     return NULL;
3558                 }
3559             }
3560             else {
3561                 if (_PyBytes_Resize(&result, size)) {
3562                     assert(result == NULL);
3563                     return NULL;
3564                 }
3565             }
3566         }
3567     }
3568     return result;
3569 }
3570 
3571 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3572 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3573                           const void *bytes, Py_ssize_t size)
3574 {
3575     char *str = (char *)ptr;
3576 
3577     str = _PyBytesWriter_Prepare(writer, str, size);
3578     if (str == NULL)
3579         return NULL;
3580 
3581     memcpy(str, bytes, size);
3582     str += size;
3583 
3584     return str;
3585 }
3586 
3587 
3588 void
_PyBytes_Repeat(char * dest,Py_ssize_t len_dest,const char * src,Py_ssize_t len_src)3589 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3590     const char* src, Py_ssize_t len_src)
3591 {
3592     if (len_dest == 0) {
3593         return;
3594     }
3595     if (len_src == 1) {
3596         memset(dest, src[0], len_dest);
3597     }
3598     else {
3599         if (src != dest) {
3600             memcpy(dest, src, len_src);
3601         }
3602         Py_ssize_t copied = len_src;
3603         while (copied < len_dest) {
3604             Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3605             memcpy(dest + copied, dest, bytes_to_copy);
3606             copied += bytes_to_copy;
3607         }
3608     }
3609 }
3610 
3611