• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "pycore_abstract.h"      // _PyIndex_Check()
7 #include "pycore_bytes_methods.h"
8 #include "pycore_object.h"
9 #include "pycore_pymem.h"         // PYMEM_CLEANBYTE
10 
11 #include "pystrhex.h"
12 #include <stddef.h>
13 
14 /*[clinic input]
15 class bytes "PyBytesObject *" "&PyBytes_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
18 
19 #include "clinic/bytesobject.c.h"
20 
21 static PyBytesObject *characters[UCHAR_MAX + 1];
22 static PyBytesObject *nullstring;
23 
24 _Py_IDENTIFIER(__bytes__);
25 
26 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27    for a string of length n should request PyBytesObject_SIZE + n bytes.
28 
29    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30    3 bytes per string allocation on a typical system.
31 */
32 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33 
34 /* Forward declaration */
35 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36                                                    char *str);
37 
38 /*
39    For PyBytes_FromString(), the parameter `str' points to a null-terminated
40    string containing exactly `size' bytes.
41 
42    For PyBytes_FromStringAndSize(), the parameter `str' is
43    either NULL or else points to a string containing at least `size' bytes.
44    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45    not have to be null-terminated.  (Therefore it is safe to construct a
46    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48    bytes (setting the last byte to the null terminating character) and you can
49    fill in the data yourself.  If `str' is non-NULL then the resulting
50    PyBytes object must be treated as immutable and you must not fill in nor
51    alter the data yourself, since the strings may be shared.
52 
53    The PyObject member `op->ob_size', which denotes the number of "extra
54    items" in a variable-size object, will contain the number of bytes
55    allocated for string data, not counting the null terminating character.
56    It is therefore equal to the `size' parameter (for
57    PyBytes_FromStringAndSize()) or the length of the string in the `str'
58    parameter (for PyBytes_FromString()).
59 */
60 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)61 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
62 {
63     PyBytesObject *op;
64     assert(size >= 0);
65 
66     if (size == 0 && (op = nullstring) != NULL) {
67         Py_INCREF(op);
68         return (PyObject *)op;
69     }
70 
71     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
72         PyErr_SetString(PyExc_OverflowError,
73                         "byte string is too large");
74         return NULL;
75     }
76 
77     /* Inline PyObject_NewVar */
78     if (use_calloc)
79         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80     else
81         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
82     if (op == NULL)
83         return PyErr_NoMemory();
84     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
85     op->ob_shash = -1;
86     if (!use_calloc)
87         op->ob_sval[size] = '\0';
88     /* empty byte string singleton */
89     if (size == 0) {
90         nullstring = op;
91         Py_INCREF(op);
92     }
93     return (PyObject *) op;
94 }
95 
96 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)97 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98 {
99     PyBytesObject *op;
100     if (size < 0) {
101         PyErr_SetString(PyExc_SystemError,
102             "Negative size passed to PyBytes_FromStringAndSize");
103         return NULL;
104     }
105     if (size == 1 && str != NULL &&
106         (op = characters[*str & UCHAR_MAX]) != NULL)
107     {
108         Py_INCREF(op);
109         return (PyObject *)op;
110     }
111 
112     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113     if (op == NULL)
114         return NULL;
115     if (str == NULL)
116         return (PyObject *) op;
117 
118     memcpy(op->ob_sval, str, size);
119     /* share short strings */
120     if (size == 1) {
121         characters[*str & UCHAR_MAX] = op;
122         Py_INCREF(op);
123     }
124     return (PyObject *) op;
125 }
126 
127 PyObject *
PyBytes_FromString(const char * str)128 PyBytes_FromString(const char *str)
129 {
130     size_t size;
131     PyBytesObject *op;
132 
133     assert(str != NULL);
134     size = strlen(str);
135     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136         PyErr_SetString(PyExc_OverflowError,
137             "byte string is too long");
138         return NULL;
139     }
140     if (size == 0 && (op = nullstring) != NULL) {
141         Py_INCREF(op);
142         return (PyObject *)op;
143     }
144     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
145         Py_INCREF(op);
146         return (PyObject *)op;
147     }
148 
149     /* Inline PyObject_NewVar */
150     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151     if (op == NULL)
152         return PyErr_NoMemory();
153     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
154     op->ob_shash = -1;
155     memcpy(op->ob_sval, str, size+1);
156     /* share short strings */
157     if (size == 0) {
158         nullstring = op;
159         Py_INCREF(op);
160     } else if (size == 1) {
161         characters[*str & UCHAR_MAX] = op;
162         Py_INCREF(op);
163     }
164     return (PyObject *) op;
165 }
166 
167 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)168 PyBytes_FromFormatV(const char *format, va_list vargs)
169 {
170     char *s;
171     const char *f;
172     const char *p;
173     Py_ssize_t prec;
174     int longflag;
175     int size_tflag;
176     /* Longest 64-bit formatted numbers:
177        - "18446744073709551615\0" (21 bytes)
178        - "-9223372036854775808\0" (21 bytes)
179        Decimal takes the most space (it isn't enough for octal.)
180 
181        Longest 64-bit pointer representation:
182        "0xffffffffffffffff\0" (19 bytes). */
183     char buffer[21];
184     _PyBytesWriter writer;
185 
186     _PyBytesWriter_Init(&writer);
187 
188     s = _PyBytesWriter_Alloc(&writer, strlen(format));
189     if (s == NULL)
190         return NULL;
191     writer.overallocate = 1;
192 
193 #define WRITE_BYTES(str) \
194     do { \
195         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196         if (s == NULL) \
197             goto error; \
198     } while (0)
199 
200     for (f = format; *f; f++) {
201         if (*f != '%') {
202             *s++ = *f;
203             continue;
204         }
205 
206         p = f++;
207 
208         /* ignore the width (ex: 10 in "%10s") */
209         while (Py_ISDIGIT(*f))
210             f++;
211 
212         /* parse the precision (ex: 10 in "%.10s") */
213         prec = 0;
214         if (*f == '.') {
215             f++;
216             for (; Py_ISDIGIT(*f); f++) {
217                 prec = (prec * 10) + (*f - '0');
218             }
219         }
220 
221         while (*f && *f != '%' && !Py_ISALPHA(*f))
222             f++;
223 
224         /* handle the long flag ('l'), but only for %ld and %lu.
225            others can be added when necessary. */
226         longflag = 0;
227         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228             longflag = 1;
229             ++f;
230         }
231 
232         /* handle the size_t flag ('z'). */
233         size_tflag = 0;
234         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235             size_tflag = 1;
236             ++f;
237         }
238 
239         /* subtract bytes preallocated for the format string
240            (ex: 2 for "%s") */
241         writer.min_size -= (f - p + 1);
242 
243         switch (*f) {
244         case 'c':
245         {
246             int c = va_arg(vargs, int);
247             if (c < 0 || c > 255) {
248                 PyErr_SetString(PyExc_OverflowError,
249                                 "PyBytes_FromFormatV(): %c format "
250                                 "expects an integer in range [0; 255]");
251                 goto error;
252             }
253             writer.min_size++;
254             *s++ = (unsigned char)c;
255             break;
256         }
257 
258         case 'd':
259             if (longflag)
260                 sprintf(buffer, "%ld", va_arg(vargs, long));
261             else if (size_tflag)
262                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
263                     va_arg(vargs, Py_ssize_t));
264             else
265                 sprintf(buffer, "%d", va_arg(vargs, int));
266             assert(strlen(buffer) < sizeof(buffer));
267             WRITE_BYTES(buffer);
268             break;
269 
270         case 'u':
271             if (longflag)
272                 sprintf(buffer, "%lu",
273                     va_arg(vargs, unsigned long));
274             else if (size_tflag)
275                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
276                     va_arg(vargs, size_t));
277             else
278                 sprintf(buffer, "%u",
279                     va_arg(vargs, unsigned int));
280             assert(strlen(buffer) < sizeof(buffer));
281             WRITE_BYTES(buffer);
282             break;
283 
284         case 'i':
285             sprintf(buffer, "%i", va_arg(vargs, int));
286             assert(strlen(buffer) < sizeof(buffer));
287             WRITE_BYTES(buffer);
288             break;
289 
290         case 'x':
291             sprintf(buffer, "%x", va_arg(vargs, int));
292             assert(strlen(buffer) < sizeof(buffer));
293             WRITE_BYTES(buffer);
294             break;
295 
296         case 's':
297         {
298             Py_ssize_t i;
299 
300             p = va_arg(vargs, const char*);
301             if (prec <= 0) {
302                 i = strlen(p);
303             }
304             else {
305                 i = 0;
306                 while (i < prec && p[i]) {
307                     i++;
308                 }
309             }
310             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
311             if (s == NULL)
312                 goto error;
313             break;
314         }
315 
316         case 'p':
317             sprintf(buffer, "%p", va_arg(vargs, void*));
318             assert(strlen(buffer) < sizeof(buffer));
319             /* %p is ill-defined:  ensure leading 0x. */
320             if (buffer[1] == 'X')
321                 buffer[1] = 'x';
322             else if (buffer[1] != 'x') {
323                 memmove(buffer+2, buffer, strlen(buffer)+1);
324                 buffer[0] = '0';
325                 buffer[1] = 'x';
326             }
327             WRITE_BYTES(buffer);
328             break;
329 
330         case '%':
331             writer.min_size++;
332             *s++ = '%';
333             break;
334 
335         default:
336             if (*f == 0) {
337                 /* fix min_size if we reached the end of the format string */
338                 writer.min_size++;
339             }
340 
341             /* invalid format string: copy unformatted string and exit */
342             WRITE_BYTES(p);
343             return _PyBytesWriter_Finish(&writer, s);
344         }
345     }
346 
347 #undef WRITE_BYTES
348 
349     return _PyBytesWriter_Finish(&writer, s);
350 
351  error:
352     _PyBytesWriter_Dealloc(&writer);
353     return NULL;
354 }
355 
356 PyObject *
PyBytes_FromFormat(const char * format,...)357 PyBytes_FromFormat(const char *format, ...)
358 {
359     PyObject* ret;
360     va_list vargs;
361 
362 #ifdef HAVE_STDARG_PROTOTYPES
363     va_start(vargs, format);
364 #else
365     va_start(vargs);
366 #endif
367     ret = PyBytes_FromFormatV(format, vargs);
368     va_end(vargs);
369     return ret;
370 }
371 
372 /* Helpers for formatstring */
373 
374 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)375 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
376 {
377     Py_ssize_t argidx = *p_argidx;
378     if (argidx < arglen) {
379         (*p_argidx)++;
380         if (arglen < 0)
381             return args;
382         else
383             return PyTuple_GetItem(args, argidx);
384     }
385     PyErr_SetString(PyExc_TypeError,
386                     "not enough arguments for format string");
387     return NULL;
388 }
389 
390 /* Format codes
391  * F_LJUST      '-'
392  * F_SIGN       '+'
393  * F_BLANK      ' '
394  * F_ALT        '#'
395  * F_ZERO       '0'
396  */
397 #define F_LJUST (1<<0)
398 #define F_SIGN  (1<<1)
399 #define F_BLANK (1<<2)
400 #define F_ALT   (1<<3)
401 #define F_ZERO  (1<<4)
402 
403 /* Returns a new reference to a PyBytes object, or NULL on failure. */
404 
405 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)406 formatfloat(PyObject *v, int flags, int prec, int type,
407             PyObject **p_result, _PyBytesWriter *writer, char *str)
408 {
409     char *p;
410     PyObject *result;
411     double x;
412     size_t len;
413 
414     x = PyFloat_AsDouble(v);
415     if (x == -1.0 && PyErr_Occurred()) {
416         PyErr_Format(PyExc_TypeError, "float argument required, "
417                      "not %.200s", Py_TYPE(v)->tp_name);
418         return NULL;
419     }
420 
421     if (prec < 0)
422         prec = 6;
423 
424     p = PyOS_double_to_string(x, type, prec,
425                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
426 
427     if (p == NULL)
428         return NULL;
429 
430     len = strlen(p);
431     if (writer != NULL) {
432         str = _PyBytesWriter_Prepare(writer, str, len);
433         if (str == NULL)
434             return NULL;
435         memcpy(str, p, len);
436         PyMem_Free(p);
437         str += len;
438         return str;
439     }
440 
441     result = PyBytes_FromStringAndSize(p, len);
442     PyMem_Free(p);
443     *p_result = result;
444     return result != NULL ? str : NULL;
445 }
446 
447 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)448 formatlong(PyObject *v, int flags, int prec, int type)
449 {
450     PyObject *result, *iobj;
451     if (type == 'i')
452         type = 'd';
453     if (PyLong_Check(v))
454         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455     if (PyNumber_Check(v)) {
456         /* make sure number is a type of integer for o, x, and X */
457         if (type == 'o' || type == 'x' || type == 'X')
458             iobj = PyNumber_Index(v);
459         else
460             iobj = PyNumber_Long(v);
461         if (iobj == NULL) {
462             if (!PyErr_ExceptionMatches(PyExc_TypeError))
463                 return NULL;
464         }
465         else if (!PyLong_Check(iobj))
466             Py_CLEAR(iobj);
467         if (iobj != NULL) {
468             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469             Py_DECREF(iobj);
470             return result;
471         }
472     }
473     PyErr_Format(PyExc_TypeError,
474         "%%%c format: %s is required, not %.200s", type,
475         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476                                                     : "a number",
477         Py_TYPE(v)->tp_name);
478     return NULL;
479 }
480 
481 static int
byte_converter(PyObject * arg,char * p)482 byte_converter(PyObject *arg, char *p)
483 {
484     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
485         *p = PyBytes_AS_STRING(arg)[0];
486         return 1;
487     }
488     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
489         *p = PyByteArray_AS_STRING(arg)[0];
490         return 1;
491     }
492     else {
493         PyObject *iobj;
494         long ival;
495         int overflow;
496         /* make sure number is a type of integer */
497         if (PyLong_Check(arg)) {
498             ival = PyLong_AsLongAndOverflow(arg, &overflow);
499         }
500         else {
501             iobj = PyNumber_Index(arg);
502             if (iobj == NULL) {
503                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504                     return 0;
505                 goto onError;
506             }
507             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508             Py_DECREF(iobj);
509         }
510         if (!overflow && ival == -1 && PyErr_Occurred())
511             goto onError;
512         if (overflow || !(0 <= ival && ival <= 255)) {
513             PyErr_SetString(PyExc_OverflowError,
514                             "%c arg not in range(256)");
515             return 0;
516         }
517         *p = (char)ival;
518         return 1;
519     }
520   onError:
521     PyErr_SetString(PyExc_TypeError,
522         "%c requires an integer in range(256) or a single byte");
523     return 0;
524 }
525 
526 static PyObject *_PyBytes_FromBuffer(PyObject *x);
527 
528 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)529 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
530 {
531     PyObject *func, *result;
532     /* is it a bytes object? */
533     if (PyBytes_Check(v)) {
534         *pbuf = PyBytes_AS_STRING(v);
535         *plen = PyBytes_GET_SIZE(v);
536         Py_INCREF(v);
537         return v;
538     }
539     if (PyByteArray_Check(v)) {
540         *pbuf = PyByteArray_AS_STRING(v);
541         *plen = PyByteArray_GET_SIZE(v);
542         Py_INCREF(v);
543         return v;
544     }
545     /* does it support __bytes__? */
546     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
547     if (func != NULL) {
548         result = _PyObject_CallNoArg(func);
549         Py_DECREF(func);
550         if (result == NULL)
551             return NULL;
552         if (!PyBytes_Check(result)) {
553             PyErr_Format(PyExc_TypeError,
554                          "__bytes__ returned non-bytes (type %.200s)",
555                          Py_TYPE(result)->tp_name);
556             Py_DECREF(result);
557             return NULL;
558         }
559         *pbuf = PyBytes_AS_STRING(result);
560         *plen = PyBytes_GET_SIZE(result);
561         return result;
562     }
563     /* does it support buffer protocol? */
564     if (PyObject_CheckBuffer(v)) {
565         /* maybe we can avoid making a copy of the buffer object here? */
566         result = _PyBytes_FromBuffer(v);
567         if (result == NULL)
568             return NULL;
569         *pbuf = PyBytes_AS_STRING(result);
570         *plen = PyBytes_GET_SIZE(result);
571         return result;
572     }
573     PyErr_Format(PyExc_TypeError,
574                  "%%b requires a bytes-like object, "
575                  "or an object that implements __bytes__, not '%.100s'",
576                  Py_TYPE(v)->tp_name);
577     return NULL;
578 }
579 
580 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
581 
582 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)583 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
584                   PyObject *args, int use_bytearray)
585 {
586     const char *fmt;
587     char *res;
588     Py_ssize_t arglen, argidx;
589     Py_ssize_t fmtcnt;
590     int args_owned = 0;
591     PyObject *dict = NULL;
592     _PyBytesWriter writer;
593 
594     if (args == NULL) {
595         PyErr_BadInternalCall();
596         return NULL;
597     }
598     fmt = format;
599     fmtcnt = format_len;
600 
601     _PyBytesWriter_Init(&writer);
602     writer.use_bytearray = use_bytearray;
603 
604     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
605     if (res == NULL)
606         return NULL;
607     if (!use_bytearray)
608         writer.overallocate = 1;
609 
610     if (PyTuple_Check(args)) {
611         arglen = PyTuple_GET_SIZE(args);
612         argidx = 0;
613     }
614     else {
615         arglen = -1;
616         argidx = -2;
617     }
618     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
619         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
620         !PyByteArray_Check(args)) {
621             dict = args;
622     }
623 
624     while (--fmtcnt >= 0) {
625         if (*fmt != '%') {
626             Py_ssize_t len;
627             char *pos;
628 
629             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
630             if (pos != NULL)
631                 len = pos - fmt;
632             else
633                 len = fmtcnt + 1;
634             assert(len != 0);
635 
636             memcpy(res, fmt, len);
637             res += len;
638             fmt += len;
639             fmtcnt -= (len - 1);
640         }
641         else {
642             /* Got a format specifier */
643             int flags = 0;
644             Py_ssize_t width = -1;
645             int prec = -1;
646             int c = '\0';
647             int fill;
648             PyObject *v = NULL;
649             PyObject *temp = NULL;
650             const char *pbuf = NULL;
651             int sign;
652             Py_ssize_t len = 0;
653             char onechar; /* For byte_converter() */
654             Py_ssize_t alloc;
655 
656             fmt++;
657             if (*fmt == '%') {
658                 *res++ = '%';
659                 fmt++;
660                 fmtcnt--;
661                 continue;
662             }
663             if (*fmt == '(') {
664                 const char *keystart;
665                 Py_ssize_t keylen;
666                 PyObject *key;
667                 int pcount = 1;
668 
669                 if (dict == NULL) {
670                     PyErr_SetString(PyExc_TypeError,
671                              "format requires a mapping");
672                     goto error;
673                 }
674                 ++fmt;
675                 --fmtcnt;
676                 keystart = fmt;
677                 /* Skip over balanced parentheses */
678                 while (pcount > 0 && --fmtcnt >= 0) {
679                     if (*fmt == ')')
680                         --pcount;
681                     else if (*fmt == '(')
682                         ++pcount;
683                     fmt++;
684                 }
685                 keylen = fmt - keystart - 1;
686                 if (fmtcnt < 0 || pcount > 0) {
687                     PyErr_SetString(PyExc_ValueError,
688                                "incomplete format key");
689                     goto error;
690                 }
691                 key = PyBytes_FromStringAndSize(keystart,
692                                                  keylen);
693                 if (key == NULL)
694                     goto error;
695                 if (args_owned) {
696                     Py_DECREF(args);
697                     args_owned = 0;
698                 }
699                 args = PyObject_GetItem(dict, key);
700                 Py_DECREF(key);
701                 if (args == NULL) {
702                     goto error;
703                 }
704                 args_owned = 1;
705                 arglen = -1;
706                 argidx = -2;
707             }
708 
709             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
710             while (--fmtcnt >= 0) {
711                 switch (c = *fmt++) {
712                 case '-': flags |= F_LJUST; continue;
713                 case '+': flags |= F_SIGN; continue;
714                 case ' ': flags |= F_BLANK; continue;
715                 case '#': flags |= F_ALT; continue;
716                 case '0': flags |= F_ZERO; continue;
717                 }
718                 break;
719             }
720 
721             /* Parse width. Example: "%10s" => width=10 */
722             if (c == '*') {
723                 v = getnextarg(args, arglen, &argidx);
724                 if (v == NULL)
725                     goto error;
726                 if (!PyLong_Check(v)) {
727                     PyErr_SetString(PyExc_TypeError,
728                                     "* wants int");
729                     goto error;
730                 }
731                 width = PyLong_AsSsize_t(v);
732                 if (width == -1 && PyErr_Occurred())
733                     goto error;
734                 if (width < 0) {
735                     flags |= F_LJUST;
736                     width = -width;
737                 }
738                 if (--fmtcnt >= 0)
739                     c = *fmt++;
740             }
741             else if (c >= 0 && isdigit(c)) {
742                 width = c - '0';
743                 while (--fmtcnt >= 0) {
744                     c = Py_CHARMASK(*fmt++);
745                     if (!isdigit(c))
746                         break;
747                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
748                         PyErr_SetString(
749                             PyExc_ValueError,
750                             "width too big");
751                         goto error;
752                     }
753                     width = width*10 + (c - '0');
754                 }
755             }
756 
757             /* Parse precision. Example: "%.3f" => prec=3 */
758             if (c == '.') {
759                 prec = 0;
760                 if (--fmtcnt >= 0)
761                     c = *fmt++;
762                 if (c == '*') {
763                     v = getnextarg(args, arglen, &argidx);
764                     if (v == NULL)
765                         goto error;
766                     if (!PyLong_Check(v)) {
767                         PyErr_SetString(
768                             PyExc_TypeError,
769                             "* wants int");
770                         goto error;
771                     }
772                     prec = _PyLong_AsInt(v);
773                     if (prec == -1 && PyErr_Occurred())
774                         goto error;
775                     if (prec < 0)
776                         prec = 0;
777                     if (--fmtcnt >= 0)
778                         c = *fmt++;
779                 }
780                 else if (c >= 0 && isdigit(c)) {
781                     prec = c - '0';
782                     while (--fmtcnt >= 0) {
783                         c = Py_CHARMASK(*fmt++);
784                         if (!isdigit(c))
785                             break;
786                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
787                             PyErr_SetString(
788                                 PyExc_ValueError,
789                                 "prec too big");
790                             goto error;
791                         }
792                         prec = prec*10 + (c - '0');
793                     }
794                 }
795             } /* prec */
796             if (fmtcnt >= 0) {
797                 if (c == 'h' || c == 'l' || c == 'L') {
798                     if (--fmtcnt >= 0)
799                         c = *fmt++;
800                 }
801             }
802             if (fmtcnt < 0) {
803                 PyErr_SetString(PyExc_ValueError,
804                                 "incomplete format");
805                 goto error;
806             }
807             v = getnextarg(args, arglen, &argidx);
808             if (v == NULL)
809                 goto error;
810 
811             if (fmtcnt == 0) {
812                 /* last write: disable writer overallocation */
813                 writer.overallocate = 0;
814             }
815 
816             sign = 0;
817             fill = ' ';
818             switch (c) {
819             case 'r':
820                 // %r is only for 2/3 code; 3 only code should use %a
821             case 'a':
822                 temp = PyObject_ASCII(v);
823                 if (temp == NULL)
824                     goto error;
825                 assert(PyUnicode_IS_ASCII(temp));
826                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
827                 len = PyUnicode_GET_LENGTH(temp);
828                 if (prec >= 0 && len > prec)
829                     len = prec;
830                 break;
831 
832             case 's':
833                 // %s is only for 2/3 code; 3 only code should use %b
834             case 'b':
835                 temp = format_obj(v, &pbuf, &len);
836                 if (temp == NULL)
837                     goto error;
838                 if (prec >= 0 && len > prec)
839                     len = prec;
840                 break;
841 
842             case 'i':
843             case 'd':
844             case 'u':
845             case 'o':
846             case 'x':
847             case 'X':
848                 if (PyLong_CheckExact(v)
849                     && width == -1 && prec == -1
850                     && !(flags & (F_SIGN | F_BLANK))
851                     && c != 'X')
852                 {
853                     /* Fast path */
854                     int alternate = flags & F_ALT;
855                     int base;
856 
857                     switch(c)
858                     {
859                         default:
860                             Py_UNREACHABLE();
861                         case 'd':
862                         case 'i':
863                         case 'u':
864                             base = 10;
865                             break;
866                         case 'o':
867                             base = 8;
868                             break;
869                         case 'x':
870                         case 'X':
871                             base = 16;
872                             break;
873                     }
874 
875                     /* Fast path */
876                     writer.min_size -= 2; /* size preallocated for "%d" */
877                     res = _PyLong_FormatBytesWriter(&writer, res,
878                                                     v, base, alternate);
879                     if (res == NULL)
880                         goto error;
881                     continue;
882                 }
883 
884                 temp = formatlong(v, flags, prec, c);
885                 if (!temp)
886                     goto error;
887                 assert(PyUnicode_IS_ASCII(temp));
888                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
889                 len = PyUnicode_GET_LENGTH(temp);
890                 sign = 1;
891                 if (flags & F_ZERO)
892                     fill = '0';
893                 break;
894 
895             case 'e':
896             case 'E':
897             case 'f':
898             case 'F':
899             case 'g':
900             case 'G':
901                 if (width == -1 && prec == -1
902                     && !(flags & (F_SIGN | F_BLANK)))
903                 {
904                     /* Fast path */
905                     writer.min_size -= 2; /* size preallocated for "%f" */
906                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
907                     if (res == NULL)
908                         goto error;
909                     continue;
910                 }
911 
912                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
913                     goto error;
914                 pbuf = PyBytes_AS_STRING(temp);
915                 len = PyBytes_GET_SIZE(temp);
916                 sign = 1;
917                 if (flags & F_ZERO)
918                     fill = '0';
919                 break;
920 
921             case 'c':
922                 pbuf = &onechar;
923                 len = byte_converter(v, &onechar);
924                 if (!len)
925                     goto error;
926                 if (width == -1) {
927                     /* Fast path */
928                     *res++ = onechar;
929                     continue;
930                 }
931                 break;
932 
933             default:
934                 PyErr_Format(PyExc_ValueError,
935                   "unsupported format character '%c' (0x%x) "
936                   "at index %zd",
937                   c, c,
938                   (Py_ssize_t)(fmt - 1 - format));
939                 goto error;
940             }
941 
942             if (sign) {
943                 if (*pbuf == '-' || *pbuf == '+') {
944                     sign = *pbuf++;
945                     len--;
946                 }
947                 else if (flags & F_SIGN)
948                     sign = '+';
949                 else if (flags & F_BLANK)
950                     sign = ' ';
951                 else
952                     sign = 0;
953             }
954             if (width < len)
955                 width = len;
956 
957             alloc = width;
958             if (sign != 0 && len == width)
959                 alloc++;
960             /* 2: size preallocated for %s */
961             if (alloc > 2) {
962                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
963                 if (res == NULL)
964                     goto error;
965             }
966 #ifndef NDEBUG
967             char *before = res;
968 #endif
969 
970             /* Write the sign if needed */
971             if (sign) {
972                 if (fill != ' ')
973                     *res++ = sign;
974                 if (width > len)
975                     width--;
976             }
977 
978             /* Write the numeric prefix for "x", "X" and "o" formats
979                if the alternate form is used.
980                For example, write "0x" for the "%#x" format. */
981             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
982                 assert(pbuf[0] == '0');
983                 assert(pbuf[1] == c);
984                 if (fill != ' ') {
985                     *res++ = *pbuf++;
986                     *res++ = *pbuf++;
987                 }
988                 width -= 2;
989                 if (width < 0)
990                     width = 0;
991                 len -= 2;
992             }
993 
994             /* Pad left with the fill character if needed */
995             if (width > len && !(flags & F_LJUST)) {
996                 memset(res, fill, width - len);
997                 res += (width - len);
998                 width = len;
999             }
1000 
1001             /* If padding with spaces: write sign if needed and/or numeric
1002                prefix if the alternate form is used */
1003             if (fill == ' ') {
1004                 if (sign)
1005                     *res++ = sign;
1006                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1007                     assert(pbuf[0] == '0');
1008                     assert(pbuf[1] == c);
1009                     *res++ = *pbuf++;
1010                     *res++ = *pbuf++;
1011                 }
1012             }
1013 
1014             /* Copy bytes */
1015             memcpy(res, pbuf, len);
1016             res += len;
1017 
1018             /* Pad right with the fill character if needed */
1019             if (width > len) {
1020                 memset(res, ' ', width - len);
1021                 res += (width - len);
1022             }
1023 
1024             if (dict && (argidx < arglen)) {
1025                 PyErr_SetString(PyExc_TypeError,
1026                            "not all arguments converted during bytes formatting");
1027                 Py_XDECREF(temp);
1028                 goto error;
1029             }
1030             Py_XDECREF(temp);
1031 
1032 #ifndef NDEBUG
1033             /* check that we computed the exact size for this write */
1034             assert((res - before) == alloc);
1035 #endif
1036         } /* '%' */
1037 
1038         /* If overallocation was disabled, ensure that it was the last
1039            write. Otherwise, we missed an optimization */
1040         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1041     } /* until end */
1042 
1043     if (argidx < arglen && !dict) {
1044         PyErr_SetString(PyExc_TypeError,
1045                         "not all arguments converted during bytes formatting");
1046         goto error;
1047     }
1048 
1049     if (args_owned) {
1050         Py_DECREF(args);
1051     }
1052     return _PyBytesWriter_Finish(&writer, res);
1053 
1054  error:
1055     _PyBytesWriter_Dealloc(&writer);
1056     if (args_owned) {
1057         Py_DECREF(args);
1058     }
1059     return NULL;
1060 }
1061 
1062 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1063 PyObject *_PyBytes_DecodeEscape(const char *s,
1064                                 Py_ssize_t len,
1065                                 const char *errors,
1066                                 const char **first_invalid_escape)
1067 {
1068     int c;
1069     char *p;
1070     const char *end;
1071     _PyBytesWriter writer;
1072 
1073     _PyBytesWriter_Init(&writer);
1074 
1075     p = _PyBytesWriter_Alloc(&writer, len);
1076     if (p == NULL)
1077         return NULL;
1078     writer.overallocate = 1;
1079 
1080     *first_invalid_escape = NULL;
1081 
1082     end = s + len;
1083     while (s < end) {
1084         if (*s != '\\') {
1085             *p++ = *s++;
1086             continue;
1087         }
1088 
1089         s++;
1090         if (s == end) {
1091             PyErr_SetString(PyExc_ValueError,
1092                             "Trailing \\ in string");
1093             goto failed;
1094         }
1095 
1096         switch (*s++) {
1097         /* XXX This assumes ASCII! */
1098         case '\n': break;
1099         case '\\': *p++ = '\\'; break;
1100         case '\'': *p++ = '\''; break;
1101         case '\"': *p++ = '\"'; break;
1102         case 'b': *p++ = '\b'; break;
1103         case 'f': *p++ = '\014'; break; /* FF */
1104         case 't': *p++ = '\t'; break;
1105         case 'n': *p++ = '\n'; break;
1106         case 'r': *p++ = '\r'; break;
1107         case 'v': *p++ = '\013'; break; /* VT */
1108         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1109         case '0': case '1': case '2': case '3':
1110         case '4': case '5': case '6': case '7':
1111             c = s[-1] - '0';
1112             if (s < end && '0' <= *s && *s <= '7') {
1113                 c = (c<<3) + *s++ - '0';
1114                 if (s < end && '0' <= *s && *s <= '7')
1115                     c = (c<<3) + *s++ - '0';
1116             }
1117             *p++ = c;
1118             break;
1119         case 'x':
1120             if (s+1 < end) {
1121                 int digit1, digit2;
1122                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1123                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1124                 if (digit1 < 16 && digit2 < 16) {
1125                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1126                     s += 2;
1127                     break;
1128                 }
1129             }
1130             /* invalid hexadecimal digits */
1131 
1132             if (!errors || strcmp(errors, "strict") == 0) {
1133                 PyErr_Format(PyExc_ValueError,
1134                              "invalid \\x escape at position %zd",
1135                              s - 2 - (end - len));
1136                 goto failed;
1137             }
1138             if (strcmp(errors, "replace") == 0) {
1139                 *p++ = '?';
1140             } else if (strcmp(errors, "ignore") == 0)
1141                 /* do nothing */;
1142             else {
1143                 PyErr_Format(PyExc_ValueError,
1144                              "decoding error; unknown "
1145                              "error handling code: %.400s",
1146                              errors);
1147                 goto failed;
1148             }
1149             /* skip \x */
1150             if (s < end && Py_ISXDIGIT(s[0]))
1151                 s++; /* and a hexdigit */
1152             break;
1153 
1154         default:
1155             if (*first_invalid_escape == NULL) {
1156                 *first_invalid_escape = s-1; /* Back up one char, since we've
1157                                                 already incremented s. */
1158             }
1159             *p++ = '\\';
1160             s--;
1161         }
1162     }
1163 
1164     return _PyBytesWriter_Finish(&writer, p);
1165 
1166   failed:
1167     _PyBytesWriter_Dealloc(&writer);
1168     return NULL;
1169 }
1170 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1171 PyObject *PyBytes_DecodeEscape(const char *s,
1172                                 Py_ssize_t len,
1173                                 const char *errors,
1174                                 Py_ssize_t Py_UNUSED(unicode),
1175                                 const char *Py_UNUSED(recode_encoding))
1176 {
1177     const char* first_invalid_escape;
1178     PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1179                                              &first_invalid_escape);
1180     if (result == NULL)
1181         return NULL;
1182     if (first_invalid_escape != NULL) {
1183         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1184                              "invalid escape sequence '\\%c'",
1185                              (unsigned char)*first_invalid_escape) < 0) {
1186             Py_DECREF(result);
1187             return NULL;
1188         }
1189     }
1190     return result;
1191 
1192 }
1193 /* -------------------------------------------------------------------- */
1194 /* object api */
1195 
1196 Py_ssize_t
PyBytes_Size(PyObject * op)1197 PyBytes_Size(PyObject *op)
1198 {
1199     if (!PyBytes_Check(op)) {
1200         PyErr_Format(PyExc_TypeError,
1201              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1202         return -1;
1203     }
1204     return Py_SIZE(op);
1205 }
1206 
1207 char *
PyBytes_AsString(PyObject * op)1208 PyBytes_AsString(PyObject *op)
1209 {
1210     if (!PyBytes_Check(op)) {
1211         PyErr_Format(PyExc_TypeError,
1212              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213         return NULL;
1214     }
1215     return ((PyBytesObject *)op)->ob_sval;
1216 }
1217 
1218 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1219 PyBytes_AsStringAndSize(PyObject *obj,
1220                          char **s,
1221                          Py_ssize_t *len)
1222 {
1223     if (s == NULL) {
1224         PyErr_BadInternalCall();
1225         return -1;
1226     }
1227 
1228     if (!PyBytes_Check(obj)) {
1229         PyErr_Format(PyExc_TypeError,
1230              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1231         return -1;
1232     }
1233 
1234     *s = PyBytes_AS_STRING(obj);
1235     if (len != NULL)
1236         *len = PyBytes_GET_SIZE(obj);
1237     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1238         PyErr_SetString(PyExc_ValueError,
1239                         "embedded null byte");
1240         return -1;
1241     }
1242     return 0;
1243 }
1244 
1245 /* -------------------------------------------------------------------- */
1246 /* Methods */
1247 
1248 #include "stringlib/stringdefs.h"
1249 
1250 #include "stringlib/fastsearch.h"
1251 #include "stringlib/count.h"
1252 #include "stringlib/find.h"
1253 #include "stringlib/join.h"
1254 #include "stringlib/partition.h"
1255 #include "stringlib/split.h"
1256 #include "stringlib/ctype.h"
1257 
1258 #include "stringlib/transmogrify.h"
1259 
1260 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1261 PyBytes_Repr(PyObject *obj, int smartquotes)
1262 {
1263     PyBytesObject* op = (PyBytesObject*) obj;
1264     Py_ssize_t i, length = Py_SIZE(op);
1265     Py_ssize_t newsize, squotes, dquotes;
1266     PyObject *v;
1267     unsigned char quote;
1268     const unsigned char *s;
1269     Py_UCS1 *p;
1270 
1271     /* Compute size of output string */
1272     squotes = dquotes = 0;
1273     newsize = 3; /* b'' */
1274     s = (const unsigned char*)op->ob_sval;
1275     for (i = 0; i < length; i++) {
1276         Py_ssize_t incr = 1;
1277         switch(s[i]) {
1278         case '\'': squotes++; break;
1279         case '"':  dquotes++; break;
1280         case '\\': case '\t': case '\n': case '\r':
1281             incr = 2; break; /* \C */
1282         default:
1283             if (s[i] < ' ' || s[i] >= 0x7f)
1284                 incr = 4; /* \xHH */
1285         }
1286         if (newsize > PY_SSIZE_T_MAX - incr)
1287             goto overflow;
1288         newsize += incr;
1289     }
1290     quote = '\'';
1291     if (smartquotes && squotes && !dquotes)
1292         quote = '"';
1293     if (squotes && quote == '\'') {
1294         if (newsize > PY_SSIZE_T_MAX - squotes)
1295             goto overflow;
1296         newsize += squotes;
1297     }
1298 
1299     v = PyUnicode_New(newsize, 127);
1300     if (v == NULL) {
1301         return NULL;
1302     }
1303     p = PyUnicode_1BYTE_DATA(v);
1304 
1305     *p++ = 'b', *p++ = quote;
1306     for (i = 0; i < length; i++) {
1307         unsigned char c = op->ob_sval[i];
1308         if (c == quote || c == '\\')
1309             *p++ = '\\', *p++ = c;
1310         else if (c == '\t')
1311             *p++ = '\\', *p++ = 't';
1312         else if (c == '\n')
1313             *p++ = '\\', *p++ = 'n';
1314         else if (c == '\r')
1315             *p++ = '\\', *p++ = 'r';
1316         else if (c < ' ' || c >= 0x7f) {
1317             *p++ = '\\';
1318             *p++ = 'x';
1319             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1320             *p++ = Py_hexdigits[c & 0xf];
1321         }
1322         else
1323             *p++ = c;
1324     }
1325     *p++ = quote;
1326     assert(_PyUnicode_CheckConsistency(v, 1));
1327     return v;
1328 
1329   overflow:
1330     PyErr_SetString(PyExc_OverflowError,
1331                     "bytes object is too large to make repr");
1332     return NULL;
1333 }
1334 
1335 static PyObject *
bytes_repr(PyObject * op)1336 bytes_repr(PyObject *op)
1337 {
1338     return PyBytes_Repr(op, 1);
1339 }
1340 
1341 static PyObject *
bytes_str(PyObject * op)1342 bytes_str(PyObject *op)
1343 {
1344     if (_Py_GetConfig()->bytes_warning) {
1345         if (PyErr_WarnEx(PyExc_BytesWarning,
1346                          "str() on a bytes instance", 1)) {
1347             return NULL;
1348         }
1349     }
1350     return bytes_repr(op);
1351 }
1352 
1353 static Py_ssize_t
bytes_length(PyBytesObject * a)1354 bytes_length(PyBytesObject *a)
1355 {
1356     return Py_SIZE(a);
1357 }
1358 
1359 /* This is also used by PyBytes_Concat() */
1360 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1361 bytes_concat(PyObject *a, PyObject *b)
1362 {
1363     Py_buffer va, vb;
1364     PyObject *result = NULL;
1365 
1366     va.len = -1;
1367     vb.len = -1;
1368     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1369         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1370         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1371                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1372         goto done;
1373     }
1374 
1375     /* Optimize end cases */
1376     if (va.len == 0 && PyBytes_CheckExact(b)) {
1377         result = b;
1378         Py_INCREF(result);
1379         goto done;
1380     }
1381     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1382         result = a;
1383         Py_INCREF(result);
1384         goto done;
1385     }
1386 
1387     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1388         PyErr_NoMemory();
1389         goto done;
1390     }
1391 
1392     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1393     if (result != NULL) {
1394         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1395         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1396     }
1397 
1398   done:
1399     if (va.len != -1)
1400         PyBuffer_Release(&va);
1401     if (vb.len != -1)
1402         PyBuffer_Release(&vb);
1403     return result;
1404 }
1405 
1406 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1407 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1408 {
1409     Py_ssize_t i;
1410     Py_ssize_t j;
1411     Py_ssize_t size;
1412     PyBytesObject *op;
1413     size_t nbytes;
1414     if (n < 0)
1415         n = 0;
1416     /* watch out for overflows:  the size can overflow int,
1417      * and the # of bytes needed can overflow size_t
1418      */
1419     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1420         PyErr_SetString(PyExc_OverflowError,
1421             "repeated bytes are too long");
1422         return NULL;
1423     }
1424     size = Py_SIZE(a) * n;
1425     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1426         Py_INCREF(a);
1427         return (PyObject *)a;
1428     }
1429     nbytes = (size_t)size;
1430     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1431         PyErr_SetString(PyExc_OverflowError,
1432             "repeated bytes are too long");
1433         return NULL;
1434     }
1435     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1436     if (op == NULL)
1437         return PyErr_NoMemory();
1438     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1439     op->ob_shash = -1;
1440     op->ob_sval[size] = '\0';
1441     if (Py_SIZE(a) == 1 && n > 0) {
1442         memset(op->ob_sval, a->ob_sval[0] , n);
1443         return (PyObject *) op;
1444     }
1445     i = 0;
1446     if (i < size) {
1447         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1448         i = Py_SIZE(a);
1449     }
1450     while (i < size) {
1451         j = (i <= size-i)  ?  i  :  size-i;
1452         memcpy(op->ob_sval+i, op->ob_sval, j);
1453         i += j;
1454     }
1455     return (PyObject *) op;
1456 }
1457 
1458 static int
bytes_contains(PyObject * self,PyObject * arg)1459 bytes_contains(PyObject *self, PyObject *arg)
1460 {
1461     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1462 }
1463 
1464 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1465 bytes_item(PyBytesObject *a, Py_ssize_t i)
1466 {
1467     if (i < 0 || i >= Py_SIZE(a)) {
1468         PyErr_SetString(PyExc_IndexError, "index out of range");
1469         return NULL;
1470     }
1471     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1472 }
1473 
1474 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1475 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1476 {
1477     int cmp;
1478     Py_ssize_t len;
1479 
1480     len = Py_SIZE(a);
1481     if (Py_SIZE(b) != len)
1482         return 0;
1483 
1484     if (a->ob_sval[0] != b->ob_sval[0])
1485         return 0;
1486 
1487     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1488     return (cmp == 0);
1489 }
1490 
1491 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1492 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1493 {
1494     int c;
1495     Py_ssize_t len_a, len_b;
1496     Py_ssize_t min_len;
1497     int rc;
1498 
1499     /* Make sure both arguments are strings. */
1500     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1501         if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1502             rc = PyObject_IsInstance((PyObject*)a,
1503                                      (PyObject*)&PyUnicode_Type);
1504             if (!rc)
1505                 rc = PyObject_IsInstance((PyObject*)b,
1506                                          (PyObject*)&PyUnicode_Type);
1507             if (rc < 0)
1508                 return NULL;
1509             if (rc) {
1510                 if (PyErr_WarnEx(PyExc_BytesWarning,
1511                                  "Comparison between bytes and string", 1))
1512                     return NULL;
1513             }
1514             else {
1515                 rc = PyObject_IsInstance((PyObject*)a,
1516                                          (PyObject*)&PyLong_Type);
1517                 if (!rc)
1518                     rc = PyObject_IsInstance((PyObject*)b,
1519                                              (PyObject*)&PyLong_Type);
1520                 if (rc < 0)
1521                     return NULL;
1522                 if (rc) {
1523                     if (PyErr_WarnEx(PyExc_BytesWarning,
1524                                      "Comparison between bytes and int", 1))
1525                         return NULL;
1526                 }
1527             }
1528         }
1529         Py_RETURN_NOTIMPLEMENTED;
1530     }
1531     else if (a == b) {
1532         switch (op) {
1533         case Py_EQ:
1534         case Py_LE:
1535         case Py_GE:
1536             /* a string is equal to itself */
1537             Py_RETURN_TRUE;
1538         case Py_NE:
1539         case Py_LT:
1540         case Py_GT:
1541             Py_RETURN_FALSE;
1542         default:
1543             PyErr_BadArgument();
1544             return NULL;
1545         }
1546     }
1547     else if (op == Py_EQ || op == Py_NE) {
1548         int eq = bytes_compare_eq(a, b);
1549         eq ^= (op == Py_NE);
1550         return PyBool_FromLong(eq);
1551     }
1552     else {
1553         len_a = Py_SIZE(a);
1554         len_b = Py_SIZE(b);
1555         min_len = Py_MIN(len_a, len_b);
1556         if (min_len > 0) {
1557             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1558             if (c == 0)
1559                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1560         }
1561         else
1562             c = 0;
1563         if (c != 0)
1564             Py_RETURN_RICHCOMPARE(c, 0, op);
1565         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1566     }
1567 }
1568 
1569 static Py_hash_t
bytes_hash(PyBytesObject * a)1570 bytes_hash(PyBytesObject *a)
1571 {
1572     if (a->ob_shash == -1) {
1573         /* Can't fail */
1574         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1575     }
1576     return a->ob_shash;
1577 }
1578 
1579 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1580 bytes_subscript(PyBytesObject* self, PyObject* item)
1581 {
1582     if (_PyIndex_Check(item)) {
1583         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1584         if (i == -1 && PyErr_Occurred())
1585             return NULL;
1586         if (i < 0)
1587             i += PyBytes_GET_SIZE(self);
1588         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1589             PyErr_SetString(PyExc_IndexError,
1590                             "index out of range");
1591             return NULL;
1592         }
1593         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1594     }
1595     else if (PySlice_Check(item)) {
1596         Py_ssize_t start, stop, step, slicelength, i;
1597         size_t cur;
1598         const char* source_buf;
1599         char* result_buf;
1600         PyObject* result;
1601 
1602         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1603             return NULL;
1604         }
1605         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1606                                             &stop, step);
1607 
1608         if (slicelength <= 0) {
1609             return PyBytes_FromStringAndSize("", 0);
1610         }
1611         else if (start == 0 && step == 1 &&
1612                  slicelength == PyBytes_GET_SIZE(self) &&
1613                  PyBytes_CheckExact(self)) {
1614             Py_INCREF(self);
1615             return (PyObject *)self;
1616         }
1617         else if (step == 1) {
1618             return PyBytes_FromStringAndSize(
1619                 PyBytes_AS_STRING(self) + start,
1620                 slicelength);
1621         }
1622         else {
1623             source_buf = PyBytes_AS_STRING(self);
1624             result = PyBytes_FromStringAndSize(NULL, slicelength);
1625             if (result == NULL)
1626                 return NULL;
1627 
1628             result_buf = PyBytes_AS_STRING(result);
1629             for (cur = start, i = 0; i < slicelength;
1630                  cur += step, i++) {
1631                 result_buf[i] = source_buf[cur];
1632             }
1633 
1634             return result;
1635         }
1636     }
1637     else {
1638         PyErr_Format(PyExc_TypeError,
1639                      "byte indices must be integers or slices, not %.200s",
1640                      Py_TYPE(item)->tp_name);
1641         return NULL;
1642     }
1643 }
1644 
1645 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1646 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1647 {
1648     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1649                              1, flags);
1650 }
1651 
1652 static PySequenceMethods bytes_as_sequence = {
1653     (lenfunc)bytes_length, /*sq_length*/
1654     (binaryfunc)bytes_concat, /*sq_concat*/
1655     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1656     (ssizeargfunc)bytes_item, /*sq_item*/
1657     0,                  /*sq_slice*/
1658     0,                  /*sq_ass_item*/
1659     0,                  /*sq_ass_slice*/
1660     (objobjproc)bytes_contains /*sq_contains*/
1661 };
1662 
1663 static PyMappingMethods bytes_as_mapping = {
1664     (lenfunc)bytes_length,
1665     (binaryfunc)bytes_subscript,
1666     0,
1667 };
1668 
1669 static PyBufferProcs bytes_as_buffer = {
1670     (getbufferproc)bytes_buffer_getbuffer,
1671     NULL,
1672 };
1673 
1674 
1675 #define LEFTSTRIP 0
1676 #define RIGHTSTRIP 1
1677 #define BOTHSTRIP 2
1678 
1679 /*[clinic input]
1680 bytes.split
1681 
1682     sep: object = None
1683         The delimiter according which to split the bytes.
1684         None (the default value) means split on ASCII whitespace characters
1685         (space, tab, return, newline, formfeed, vertical tab).
1686     maxsplit: Py_ssize_t = -1
1687         Maximum number of splits to do.
1688         -1 (the default value) means no limit.
1689 
1690 Return a list of the sections in the bytes, using sep as the delimiter.
1691 [clinic start generated code]*/
1692 
1693 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1694 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1695 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1696 {
1697     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1698     const char *s = PyBytes_AS_STRING(self), *sub;
1699     Py_buffer vsub;
1700     PyObject *list;
1701 
1702     if (maxsplit < 0)
1703         maxsplit = PY_SSIZE_T_MAX;
1704     if (sep == Py_None)
1705         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1706     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1707         return NULL;
1708     sub = vsub.buf;
1709     n = vsub.len;
1710 
1711     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1712     PyBuffer_Release(&vsub);
1713     return list;
1714 }
1715 
1716 /*[clinic input]
1717 bytes.partition
1718 
1719     sep: Py_buffer
1720     /
1721 
1722 Partition the bytes into three parts using the given separator.
1723 
1724 This will search for the separator sep in the bytes. If the separator is found,
1725 returns a 3-tuple containing the part before the separator, the separator
1726 itself, and the part after it.
1727 
1728 If the separator is not found, returns a 3-tuple containing the original bytes
1729 object and two empty bytes objects.
1730 [clinic start generated code]*/
1731 
1732 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1733 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1734 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1735 {
1736     return stringlib_partition(
1737         (PyObject*) self,
1738         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1739         sep->obj, (const char *)sep->buf, sep->len
1740         );
1741 }
1742 
1743 /*[clinic input]
1744 bytes.rpartition
1745 
1746     sep: Py_buffer
1747     /
1748 
1749 Partition the bytes into three parts using the given separator.
1750 
1751 This will search for the separator sep in the bytes, starting at the end. If
1752 the separator is found, returns a 3-tuple containing the part before the
1753 separator, the separator itself, and the part after it.
1754 
1755 If the separator is not found, returns a 3-tuple containing two empty bytes
1756 objects and the original bytes object.
1757 [clinic start generated code]*/
1758 
1759 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1760 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1761 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1762 {
1763     return stringlib_rpartition(
1764         (PyObject*) self,
1765         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1766         sep->obj, (const char *)sep->buf, sep->len
1767         );
1768 }
1769 
1770 /*[clinic input]
1771 bytes.rsplit = bytes.split
1772 
1773 Return a list of the sections in the bytes, using sep as the delimiter.
1774 
1775 Splitting is done starting at the end of the bytes and working to the front.
1776 [clinic start generated code]*/
1777 
1778 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1779 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1781 {
1782     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1783     const char *s = PyBytes_AS_STRING(self), *sub;
1784     Py_buffer vsub;
1785     PyObject *list;
1786 
1787     if (maxsplit < 0)
1788         maxsplit = PY_SSIZE_T_MAX;
1789     if (sep == Py_None)
1790         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1791     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1792         return NULL;
1793     sub = vsub.buf;
1794     n = vsub.len;
1795 
1796     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1797     PyBuffer_Release(&vsub);
1798     return list;
1799 }
1800 
1801 
1802 /*[clinic input]
1803 bytes.join
1804 
1805     iterable_of_bytes: object
1806     /
1807 
1808 Concatenate any number of bytes objects.
1809 
1810 The bytes whose method is called is inserted in between each pair.
1811 
1812 The result is returned as a new bytes object.
1813 
1814 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1815 [clinic start generated code]*/
1816 
1817 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1818 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1819 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1820 {
1821     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1822 }
1823 
1824 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1825 _PyBytes_Join(PyObject *sep, PyObject *x)
1826 {
1827     assert(sep != NULL && PyBytes_Check(sep));
1828     assert(x != NULL);
1829     return bytes_join((PyBytesObject*)sep, x);
1830 }
1831 
1832 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1833 bytes_find(PyBytesObject *self, PyObject *args)
1834 {
1835     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1836 }
1837 
1838 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1839 bytes_index(PyBytesObject *self, PyObject *args)
1840 {
1841     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1842 }
1843 
1844 
1845 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1846 bytes_rfind(PyBytesObject *self, PyObject *args)
1847 {
1848     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1849 }
1850 
1851 
1852 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1853 bytes_rindex(PyBytesObject *self, PyObject *args)
1854 {
1855     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1856 }
1857 
1858 
1859 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1860 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1861 {
1862     Py_buffer vsep;
1863     const char *s = PyBytes_AS_STRING(self);
1864     Py_ssize_t len = PyBytes_GET_SIZE(self);
1865     char *sep;
1866     Py_ssize_t seplen;
1867     Py_ssize_t i, j;
1868 
1869     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1870         return NULL;
1871     sep = vsep.buf;
1872     seplen = vsep.len;
1873 
1874     i = 0;
1875     if (striptype != RIGHTSTRIP) {
1876         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1877             i++;
1878         }
1879     }
1880 
1881     j = len;
1882     if (striptype != LEFTSTRIP) {
1883         do {
1884             j--;
1885         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1886         j++;
1887     }
1888 
1889     PyBuffer_Release(&vsep);
1890 
1891     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1892         Py_INCREF(self);
1893         return (PyObject*)self;
1894     }
1895     else
1896         return PyBytes_FromStringAndSize(s+i, j-i);
1897 }
1898 
1899 
1900 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1901 do_strip(PyBytesObject *self, int striptype)
1902 {
1903     const char *s = PyBytes_AS_STRING(self);
1904     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1905 
1906     i = 0;
1907     if (striptype != RIGHTSTRIP) {
1908         while (i < len && Py_ISSPACE(s[i])) {
1909             i++;
1910         }
1911     }
1912 
1913     j = len;
1914     if (striptype != LEFTSTRIP) {
1915         do {
1916             j--;
1917         } while (j >= i && Py_ISSPACE(s[j]));
1918         j++;
1919     }
1920 
1921     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1922         Py_INCREF(self);
1923         return (PyObject*)self;
1924     }
1925     else
1926         return PyBytes_FromStringAndSize(s+i, j-i);
1927 }
1928 
1929 
1930 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1931 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1932 {
1933     if (bytes != Py_None) {
1934         return do_xstrip(self, striptype, bytes);
1935     }
1936     return do_strip(self, striptype);
1937 }
1938 
1939 /*[clinic input]
1940 bytes.strip
1941 
1942     bytes: object = None
1943     /
1944 
1945 Strip leading and trailing bytes contained in the argument.
1946 
1947 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1948 [clinic start generated code]*/
1949 
1950 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1951 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1952 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1953 {
1954     return do_argstrip(self, BOTHSTRIP, bytes);
1955 }
1956 
1957 /*[clinic input]
1958 bytes.lstrip
1959 
1960     bytes: object = None
1961     /
1962 
1963 Strip leading bytes contained in the argument.
1964 
1965 If the argument is omitted or None, strip leading  ASCII whitespace.
1966 [clinic start generated code]*/
1967 
1968 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)1969 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1970 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
1971 {
1972     return do_argstrip(self, LEFTSTRIP, bytes);
1973 }
1974 
1975 /*[clinic input]
1976 bytes.rstrip
1977 
1978     bytes: object = None
1979     /
1980 
1981 Strip trailing bytes contained in the argument.
1982 
1983 If the argument is omitted or None, strip trailing ASCII whitespace.
1984 [clinic start generated code]*/
1985 
1986 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)1987 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
1988 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
1989 {
1990     return do_argstrip(self, RIGHTSTRIP, bytes);
1991 }
1992 
1993 
1994 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)1995 bytes_count(PyBytesObject *self, PyObject *args)
1996 {
1997     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1998 }
1999 
2000 
2001 /*[clinic input]
2002 bytes.translate
2003 
2004     table: object
2005         Translation table, which must be a bytes object of length 256.
2006     /
2007     delete as deletechars: object(c_default="NULL") = b''
2008 
2009 Return a copy with each character mapped by the given translation table.
2010 
2011 All characters occurring in the optional argument delete are removed.
2012 The remaining characters are mapped through the given translation table.
2013 [clinic start generated code]*/
2014 
2015 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2016 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2017                      PyObject *deletechars)
2018 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2019 {
2020     const char *input;
2021     char *output;
2022     Py_buffer table_view = {NULL, NULL};
2023     Py_buffer del_table_view = {NULL, NULL};
2024     const char *table_chars;
2025     Py_ssize_t i, c, changed = 0;
2026     PyObject *input_obj = (PyObject*)self;
2027     const char *output_start, *del_table_chars=NULL;
2028     Py_ssize_t inlen, tablen, dellen = 0;
2029     PyObject *result;
2030     int trans_table[256];
2031 
2032     if (PyBytes_Check(table)) {
2033         table_chars = PyBytes_AS_STRING(table);
2034         tablen = PyBytes_GET_SIZE(table);
2035     }
2036     else if (table == Py_None) {
2037         table_chars = NULL;
2038         tablen = 256;
2039     }
2040     else {
2041         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2042             return NULL;
2043         table_chars = table_view.buf;
2044         tablen = table_view.len;
2045     }
2046 
2047     if (tablen != 256) {
2048         PyErr_SetString(PyExc_ValueError,
2049           "translation table must be 256 characters long");
2050         PyBuffer_Release(&table_view);
2051         return NULL;
2052     }
2053 
2054     if (deletechars != NULL) {
2055         if (PyBytes_Check(deletechars)) {
2056             del_table_chars = PyBytes_AS_STRING(deletechars);
2057             dellen = PyBytes_GET_SIZE(deletechars);
2058         }
2059         else {
2060             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2061                 PyBuffer_Release(&table_view);
2062                 return NULL;
2063             }
2064             del_table_chars = del_table_view.buf;
2065             dellen = del_table_view.len;
2066         }
2067     }
2068     else {
2069         del_table_chars = NULL;
2070         dellen = 0;
2071     }
2072 
2073     inlen = PyBytes_GET_SIZE(input_obj);
2074     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2075     if (result == NULL) {
2076         PyBuffer_Release(&del_table_view);
2077         PyBuffer_Release(&table_view);
2078         return NULL;
2079     }
2080     output_start = output = PyBytes_AS_STRING(result);
2081     input = PyBytes_AS_STRING(input_obj);
2082 
2083     if (dellen == 0 && table_chars != NULL) {
2084         /* If no deletions are required, use faster code */
2085         for (i = inlen; --i >= 0; ) {
2086             c = Py_CHARMASK(*input++);
2087             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2088                 changed = 1;
2089         }
2090         if (!changed && PyBytes_CheckExact(input_obj)) {
2091             Py_INCREF(input_obj);
2092             Py_DECREF(result);
2093             result = input_obj;
2094         }
2095         PyBuffer_Release(&del_table_view);
2096         PyBuffer_Release(&table_view);
2097         return result;
2098     }
2099 
2100     if (table_chars == NULL) {
2101         for (i = 0; i < 256; i++)
2102             trans_table[i] = Py_CHARMASK(i);
2103     } else {
2104         for (i = 0; i < 256; i++)
2105             trans_table[i] = Py_CHARMASK(table_chars[i]);
2106     }
2107     PyBuffer_Release(&table_view);
2108 
2109     for (i = 0; i < dellen; i++)
2110         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2111     PyBuffer_Release(&del_table_view);
2112 
2113     for (i = inlen; --i >= 0; ) {
2114         c = Py_CHARMASK(*input++);
2115         if (trans_table[c] != -1)
2116             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2117                 continue;
2118         changed = 1;
2119     }
2120     if (!changed && PyBytes_CheckExact(input_obj)) {
2121         Py_DECREF(result);
2122         Py_INCREF(input_obj);
2123         return input_obj;
2124     }
2125     /* Fix the size of the resulting string */
2126     if (inlen > 0)
2127         _PyBytes_Resize(&result, output - output_start);
2128     return result;
2129 }
2130 
2131 
2132 /*[clinic input]
2133 
2134 @staticmethod
2135 bytes.maketrans
2136 
2137     frm: Py_buffer
2138     to: Py_buffer
2139     /
2140 
2141 Return a translation table useable for the bytes or bytearray translate method.
2142 
2143 The returned table will be one where each byte in frm is mapped to the byte at
2144 the same position in to.
2145 
2146 The bytes objects frm and to must be of the same length.
2147 [clinic start generated code]*/
2148 
2149 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2150 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2151 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2152 {
2153     return _Py_bytes_maketrans(frm, to);
2154 }
2155 
2156 
2157 /*[clinic input]
2158 bytes.replace
2159 
2160     old: Py_buffer
2161     new: Py_buffer
2162     count: Py_ssize_t = -1
2163         Maximum number of occurrences to replace.
2164         -1 (the default value) means replace all occurrences.
2165     /
2166 
2167 Return a copy with all occurrences of substring old replaced by new.
2168 
2169 If the optional argument count is given, only the first count occurrences are
2170 replaced.
2171 [clinic start generated code]*/
2172 
2173 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2174 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2175                    Py_ssize_t count)
2176 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2177 {
2178     return stringlib_replace((PyObject *)self,
2179                              (const char *)old->buf, old->len,
2180                              (const char *)new->buf, new->len, count);
2181 }
2182 
2183 /** End DALKE **/
2184 
2185 /*[clinic input]
2186 bytes.removeprefix as bytes_removeprefix
2187 
2188     prefix: Py_buffer
2189     /
2190 
2191 Return a bytes object with the given prefix string removed if present.
2192 
2193 If the bytes starts with the prefix string, return bytes[len(prefix):].
2194 Otherwise, return a copy of the original bytes.
2195 [clinic start generated code]*/
2196 
2197 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2198 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2199 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2200 {
2201     const char *self_start = PyBytes_AS_STRING(self);
2202     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2203     const char *prefix_start = prefix->buf;
2204     Py_ssize_t prefix_len = prefix->len;
2205 
2206     if (self_len >= prefix_len
2207         && prefix_len > 0
2208         && memcmp(self_start, prefix_start, prefix_len) == 0)
2209     {
2210         return PyBytes_FromStringAndSize(self_start + prefix_len,
2211                                          self_len - prefix_len);
2212     }
2213 
2214     if (PyBytes_CheckExact(self)) {
2215         Py_INCREF(self);
2216         return (PyObject *)self;
2217     }
2218 
2219     return PyBytes_FromStringAndSize(self_start, self_len);
2220 }
2221 
2222 /*[clinic input]
2223 bytes.removesuffix as bytes_removesuffix
2224 
2225     suffix: Py_buffer
2226     /
2227 
2228 Return a bytes object with the given suffix string removed if present.
2229 
2230 If the bytes ends with the suffix string and that suffix is not empty,
2231 return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2232 bytes.
2233 [clinic start generated code]*/
2234 
2235 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2236 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2237 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2238 {
2239     const char *self_start = PyBytes_AS_STRING(self);
2240     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2241     const char *suffix_start = suffix->buf;
2242     Py_ssize_t suffix_len = suffix->len;
2243 
2244     if (self_len >= suffix_len
2245         && suffix_len > 0
2246         && memcmp(self_start + self_len - suffix_len,
2247                   suffix_start, suffix_len) == 0)
2248     {
2249         return PyBytes_FromStringAndSize(self_start,
2250                                          self_len - suffix_len);
2251     }
2252 
2253     if (PyBytes_CheckExact(self)) {
2254         Py_INCREF(self);
2255         return (PyObject *)self;
2256     }
2257 
2258     return PyBytes_FromStringAndSize(self_start, self_len);
2259 }
2260 
2261 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2262 bytes_startswith(PyBytesObject *self, PyObject *args)
2263 {
2264     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2265 }
2266 
2267 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2268 bytes_endswith(PyBytesObject *self, PyObject *args)
2269 {
2270     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2271 }
2272 
2273 
2274 /*[clinic input]
2275 bytes.decode
2276 
2277     encoding: str(c_default="NULL") = 'utf-8'
2278         The encoding with which to decode the bytes.
2279     errors: str(c_default="NULL") = 'strict'
2280         The error handling scheme to use for the handling of decoding errors.
2281         The default is 'strict' meaning that decoding errors raise a
2282         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2283         as well as any other name registered with codecs.register_error that
2284         can handle UnicodeDecodeErrors.
2285 
2286 Decode the bytes using the codec registered for encoding.
2287 [clinic start generated code]*/
2288 
2289 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2290 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2291                   const char *errors)
2292 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2293 {
2294     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2295 }
2296 
2297 
2298 /*[clinic input]
2299 bytes.splitlines
2300 
2301     keepends: bool(accept={int}) = False
2302 
2303 Return a list of the lines in the bytes, breaking at line boundaries.
2304 
2305 Line breaks are not included in the resulting list unless keepends is given and
2306 true.
2307 [clinic start generated code]*/
2308 
2309 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2310 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2311 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2312 {
2313     return stringlib_splitlines(
2314         (PyObject*) self, PyBytes_AS_STRING(self),
2315         PyBytes_GET_SIZE(self), keepends
2316         );
2317 }
2318 
2319 /*[clinic input]
2320 @classmethod
2321 bytes.fromhex
2322 
2323     string: unicode
2324     /
2325 
2326 Create a bytes object from a string of hexadecimal numbers.
2327 
2328 Spaces between two numbers are accepted.
2329 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2330 [clinic start generated code]*/
2331 
2332 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2333 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2334 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2335 {
2336     PyObject *result = _PyBytes_FromHex(string, 0);
2337     if (type != &PyBytes_Type && result != NULL) {
2338         Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2339     }
2340     return result;
2341 }
2342 
2343 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2344 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2345 {
2346     char *buf;
2347     Py_ssize_t hexlen, invalid_char;
2348     unsigned int top, bot;
2349     const Py_UCS1 *str, *end;
2350     _PyBytesWriter writer;
2351 
2352     _PyBytesWriter_Init(&writer);
2353     writer.use_bytearray = use_bytearray;
2354 
2355     assert(PyUnicode_Check(string));
2356     if (PyUnicode_READY(string))
2357         return NULL;
2358     hexlen = PyUnicode_GET_LENGTH(string);
2359 
2360     if (!PyUnicode_IS_ASCII(string)) {
2361         const void *data = PyUnicode_DATA(string);
2362         unsigned int kind = PyUnicode_KIND(string);
2363         Py_ssize_t i;
2364 
2365         /* search for the first non-ASCII character */
2366         for (i = 0; i < hexlen; i++) {
2367             if (PyUnicode_READ(kind, data, i) >= 128)
2368                 break;
2369         }
2370         invalid_char = i;
2371         goto error;
2372     }
2373 
2374     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2375     str = PyUnicode_1BYTE_DATA(string);
2376 
2377     /* This overestimates if there are spaces */
2378     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2379     if (buf == NULL)
2380         return NULL;
2381 
2382     end = str + hexlen;
2383     while (str < end) {
2384         /* skip over spaces in the input */
2385         if (Py_ISSPACE(*str)) {
2386             do {
2387                 str++;
2388             } while (Py_ISSPACE(*str));
2389             if (str >= end)
2390                 break;
2391         }
2392 
2393         top = _PyLong_DigitValue[*str];
2394         if (top >= 16) {
2395             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2396             goto error;
2397         }
2398         str++;
2399 
2400         bot = _PyLong_DigitValue[*str];
2401         if (bot >= 16) {
2402             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2403             goto error;
2404         }
2405         str++;
2406 
2407         *buf++ = (unsigned char)((top << 4) + bot);
2408     }
2409 
2410     return _PyBytesWriter_Finish(&writer, buf);
2411 
2412   error:
2413     PyErr_Format(PyExc_ValueError,
2414                  "non-hexadecimal number found in "
2415                  "fromhex() arg at position %zd", invalid_char);
2416     _PyBytesWriter_Dealloc(&writer);
2417     return NULL;
2418 }
2419 
2420 /*[clinic input]
2421 bytes.hex
2422 
2423     sep: object = NULL
2424         An optional single character or byte to separate hex bytes.
2425     bytes_per_sep: int = 1
2426         How many bytes between separators.  Positive values count from the
2427         right, negative values count from the left.
2428 
2429 Create a str of hexadecimal numbers from a bytes object.
2430 
2431 Example:
2432 >>> value = b'\xb9\x01\xef'
2433 >>> value.hex()
2434 'b901ef'
2435 >>> value.hex(':')
2436 'b9:01:ef'
2437 >>> value.hex(':', 2)
2438 'b9:01ef'
2439 >>> value.hex(':', -2)
2440 'b901:ef'
2441 [clinic start generated code]*/
2442 
2443 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2444 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2445 /*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
2446 {
2447     const char *argbuf = PyBytes_AS_STRING(self);
2448     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2449     return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2450 }
2451 
2452 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2453 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2454 {
2455     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2456 }
2457 
2458 
2459 static PyMethodDef
2460 bytes_methods[] = {
2461     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2462     {"capitalize", stringlib_capitalize, METH_NOARGS,
2463      _Py_capitalize__doc__},
2464     STRINGLIB_CENTER_METHODDEF
2465     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2466      _Py_count__doc__},
2467     BYTES_DECODE_METHODDEF
2468     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2469      _Py_endswith__doc__},
2470     STRINGLIB_EXPANDTABS_METHODDEF
2471     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2472      _Py_find__doc__},
2473     BYTES_FROMHEX_METHODDEF
2474     BYTES_HEX_METHODDEF
2475     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2476     {"isalnum", stringlib_isalnum, METH_NOARGS,
2477      _Py_isalnum__doc__},
2478     {"isalpha", stringlib_isalpha, METH_NOARGS,
2479      _Py_isalpha__doc__},
2480     {"isascii", stringlib_isascii, METH_NOARGS,
2481      _Py_isascii__doc__},
2482     {"isdigit", stringlib_isdigit, METH_NOARGS,
2483      _Py_isdigit__doc__},
2484     {"islower", stringlib_islower, METH_NOARGS,
2485      _Py_islower__doc__},
2486     {"isspace", stringlib_isspace, METH_NOARGS,
2487      _Py_isspace__doc__},
2488     {"istitle", stringlib_istitle, METH_NOARGS,
2489      _Py_istitle__doc__},
2490     {"isupper", stringlib_isupper, METH_NOARGS,
2491      _Py_isupper__doc__},
2492     BYTES_JOIN_METHODDEF
2493     STRINGLIB_LJUST_METHODDEF
2494     {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2495     BYTES_LSTRIP_METHODDEF
2496     BYTES_MAKETRANS_METHODDEF
2497     BYTES_PARTITION_METHODDEF
2498     BYTES_REPLACE_METHODDEF
2499     BYTES_REMOVEPREFIX_METHODDEF
2500     BYTES_REMOVESUFFIX_METHODDEF
2501     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2502     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2503     STRINGLIB_RJUST_METHODDEF
2504     BYTES_RPARTITION_METHODDEF
2505     BYTES_RSPLIT_METHODDEF
2506     BYTES_RSTRIP_METHODDEF
2507     BYTES_SPLIT_METHODDEF
2508     BYTES_SPLITLINES_METHODDEF
2509     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2510      _Py_startswith__doc__},
2511     BYTES_STRIP_METHODDEF
2512     {"swapcase", stringlib_swapcase, METH_NOARGS,
2513      _Py_swapcase__doc__},
2514     {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2515     BYTES_TRANSLATE_METHODDEF
2516     {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2517     STRINGLIB_ZFILL_METHODDEF
2518     {NULL,     NULL}                         /* sentinel */
2519 };
2520 
2521 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2522 bytes_mod(PyObject *self, PyObject *arg)
2523 {
2524     if (!PyBytes_Check(self)) {
2525         Py_RETURN_NOTIMPLEMENTED;
2526     }
2527     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2528                              arg, 0);
2529 }
2530 
2531 static PyNumberMethods bytes_as_number = {
2532     0,              /*nb_add*/
2533     0,              /*nb_subtract*/
2534     0,              /*nb_multiply*/
2535     bytes_mod,      /*nb_remainder*/
2536 };
2537 
2538 static PyObject *
2539 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2540 
2541 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2542 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2543 {
2544     PyObject *x = NULL;
2545     const char *encoding = NULL;
2546     const char *errors = NULL;
2547     PyObject *new = NULL;
2548     PyObject *func;
2549     Py_ssize_t size;
2550     static char *kwlist[] = {"source", "encoding", "errors", 0};
2551 
2552     if (type != &PyBytes_Type)
2553         return bytes_subtype_new(type, args, kwds);
2554     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2555                                      &encoding, &errors))
2556         return NULL;
2557     if (x == NULL) {
2558         if (encoding != NULL || errors != NULL) {
2559             PyErr_SetString(PyExc_TypeError,
2560                             encoding != NULL ?
2561                             "encoding without a string argument" :
2562                             "errors without a string argument");
2563             return NULL;
2564         }
2565         return PyBytes_FromStringAndSize(NULL, 0);
2566     }
2567 
2568     if (encoding != NULL) {
2569         /* Encode via the codec registry */
2570         if (!PyUnicode_Check(x)) {
2571             PyErr_SetString(PyExc_TypeError,
2572                             "encoding without a string argument");
2573             return NULL;
2574         }
2575         new = PyUnicode_AsEncodedString(x, encoding, errors);
2576         if (new == NULL)
2577             return NULL;
2578         assert(PyBytes_Check(new));
2579         return new;
2580     }
2581 
2582     if (errors != NULL) {
2583         PyErr_SetString(PyExc_TypeError,
2584                         PyUnicode_Check(x) ?
2585                         "string argument without an encoding" :
2586                         "errors without a string argument");
2587         return NULL;
2588     }
2589 
2590     /* We'd like to call PyObject_Bytes here, but we need to check for an
2591        integer argument before deferring to PyBytes_FromObject, something
2592        PyObject_Bytes doesn't do. */
2593     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2594     if (func != NULL) {
2595         new = _PyObject_CallNoArg(func);
2596         Py_DECREF(func);
2597         if (new == NULL)
2598             return NULL;
2599         if (!PyBytes_Check(new)) {
2600             PyErr_Format(PyExc_TypeError,
2601                          "__bytes__ returned non-bytes (type %.200s)",
2602                          Py_TYPE(new)->tp_name);
2603             Py_DECREF(new);
2604             return NULL;
2605         }
2606         return new;
2607     }
2608     else if (PyErr_Occurred())
2609         return NULL;
2610 
2611     if (PyUnicode_Check(x)) {
2612         PyErr_SetString(PyExc_TypeError,
2613                         "string argument without an encoding");
2614         return NULL;
2615     }
2616     /* Is it an integer? */
2617     if (_PyIndex_Check(x)) {
2618         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2619         if (size == -1 && PyErr_Occurred()) {
2620             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2621                 return NULL;
2622             PyErr_Clear();  /* fall through */
2623         }
2624         else {
2625             if (size < 0) {
2626                 PyErr_SetString(PyExc_ValueError, "negative count");
2627                 return NULL;
2628             }
2629             new = _PyBytes_FromSize(size, 1);
2630             if (new == NULL)
2631                 return NULL;
2632             return new;
2633         }
2634     }
2635 
2636     return PyBytes_FromObject(x);
2637 }
2638 
2639 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2640 _PyBytes_FromBuffer(PyObject *x)
2641 {
2642     PyObject *new;
2643     Py_buffer view;
2644 
2645     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2646         return NULL;
2647 
2648     new = PyBytes_FromStringAndSize(NULL, view.len);
2649     if (!new)
2650         goto fail;
2651     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2652                 &view, view.len, 'C') < 0)
2653         goto fail;
2654     PyBuffer_Release(&view);
2655     return new;
2656 
2657 fail:
2658     Py_XDECREF(new);
2659     PyBuffer_Release(&view);
2660     return NULL;
2661 }
2662 
2663 static PyObject*
_PyBytes_FromList(PyObject * x)2664 _PyBytes_FromList(PyObject *x)
2665 {
2666     Py_ssize_t i, size = PyList_GET_SIZE(x);
2667     Py_ssize_t value;
2668     char *str;
2669     PyObject *item;
2670     _PyBytesWriter writer;
2671 
2672     _PyBytesWriter_Init(&writer);
2673     str = _PyBytesWriter_Alloc(&writer, size);
2674     if (str == NULL)
2675         return NULL;
2676     writer.overallocate = 1;
2677     size = writer.allocated;
2678 
2679     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2680         item = PyList_GET_ITEM(x, i);
2681         Py_INCREF(item);
2682         value = PyNumber_AsSsize_t(item, NULL);
2683         Py_DECREF(item);
2684         if (value == -1 && PyErr_Occurred())
2685             goto error;
2686 
2687         if (value < 0 || value >= 256) {
2688             PyErr_SetString(PyExc_ValueError,
2689                             "bytes must be in range(0, 256)");
2690             goto error;
2691         }
2692 
2693         if (i >= size) {
2694             str = _PyBytesWriter_Resize(&writer, str, size+1);
2695             if (str == NULL)
2696                 return NULL;
2697             size = writer.allocated;
2698         }
2699         *str++ = (char) value;
2700     }
2701     return _PyBytesWriter_Finish(&writer, str);
2702 
2703   error:
2704     _PyBytesWriter_Dealloc(&writer);
2705     return NULL;
2706 }
2707 
2708 static PyObject*
_PyBytes_FromTuple(PyObject * x)2709 _PyBytes_FromTuple(PyObject *x)
2710 {
2711     PyObject *bytes;
2712     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2713     Py_ssize_t value;
2714     char *str;
2715     PyObject *item;
2716 
2717     bytes = PyBytes_FromStringAndSize(NULL, size);
2718     if (bytes == NULL)
2719         return NULL;
2720     str = ((PyBytesObject *)bytes)->ob_sval;
2721 
2722     for (i = 0; i < size; i++) {
2723         item = PyTuple_GET_ITEM(x, i);
2724         value = PyNumber_AsSsize_t(item, NULL);
2725         if (value == -1 && PyErr_Occurred())
2726             goto error;
2727 
2728         if (value < 0 || value >= 256) {
2729             PyErr_SetString(PyExc_ValueError,
2730                             "bytes must be in range(0, 256)");
2731             goto error;
2732         }
2733         *str++ = (char) value;
2734     }
2735     return bytes;
2736 
2737   error:
2738     Py_DECREF(bytes);
2739     return NULL;
2740 }
2741 
2742 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2743 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2744 {
2745     char *str;
2746     Py_ssize_t i, size;
2747     _PyBytesWriter writer;
2748 
2749     /* For iterator version, create a string object and resize as needed */
2750     size = PyObject_LengthHint(x, 64);
2751     if (size == -1 && PyErr_Occurred())
2752         return NULL;
2753 
2754     _PyBytesWriter_Init(&writer);
2755     str = _PyBytesWriter_Alloc(&writer, size);
2756     if (str == NULL)
2757         return NULL;
2758     writer.overallocate = 1;
2759     size = writer.allocated;
2760 
2761     /* Run the iterator to exhaustion */
2762     for (i = 0; ; i++) {
2763         PyObject *item;
2764         Py_ssize_t value;
2765 
2766         /* Get the next item */
2767         item = PyIter_Next(it);
2768         if (item == NULL) {
2769             if (PyErr_Occurred())
2770                 goto error;
2771             break;
2772         }
2773 
2774         /* Interpret it as an int (__index__) */
2775         value = PyNumber_AsSsize_t(item, NULL);
2776         Py_DECREF(item);
2777         if (value == -1 && PyErr_Occurred())
2778             goto error;
2779 
2780         /* Range check */
2781         if (value < 0 || value >= 256) {
2782             PyErr_SetString(PyExc_ValueError,
2783                             "bytes must be in range(0, 256)");
2784             goto error;
2785         }
2786 
2787         /* Append the byte */
2788         if (i >= size) {
2789             str = _PyBytesWriter_Resize(&writer, str, size+1);
2790             if (str == NULL)
2791                 return NULL;
2792             size = writer.allocated;
2793         }
2794         *str++ = (char) value;
2795     }
2796 
2797     return _PyBytesWriter_Finish(&writer, str);
2798 
2799   error:
2800     _PyBytesWriter_Dealloc(&writer);
2801     return NULL;
2802 }
2803 
2804 PyObject *
PyBytes_FromObject(PyObject * x)2805 PyBytes_FromObject(PyObject *x)
2806 {
2807     PyObject *it, *result;
2808 
2809     if (x == NULL) {
2810         PyErr_BadInternalCall();
2811         return NULL;
2812     }
2813 
2814     if (PyBytes_CheckExact(x)) {
2815         Py_INCREF(x);
2816         return x;
2817     }
2818 
2819     /* Use the modern buffer interface */
2820     if (PyObject_CheckBuffer(x))
2821         return _PyBytes_FromBuffer(x);
2822 
2823     if (PyList_CheckExact(x))
2824         return _PyBytes_FromList(x);
2825 
2826     if (PyTuple_CheckExact(x))
2827         return _PyBytes_FromTuple(x);
2828 
2829     if (!PyUnicode_Check(x)) {
2830         it = PyObject_GetIter(x);
2831         if (it != NULL) {
2832             result = _PyBytes_FromIterator(it, x);
2833             Py_DECREF(it);
2834             return result;
2835         }
2836         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2837             return NULL;
2838         }
2839     }
2840 
2841     PyErr_Format(PyExc_TypeError,
2842                  "cannot convert '%.200s' object to bytes",
2843                  Py_TYPE(x)->tp_name);
2844     return NULL;
2845 }
2846 
2847 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2848 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2849 {
2850     PyObject *tmp, *pnew;
2851     Py_ssize_t n;
2852 
2853     assert(PyType_IsSubtype(type, &PyBytes_Type));
2854     tmp = bytes_new(&PyBytes_Type, args, kwds);
2855     if (tmp == NULL)
2856         return NULL;
2857     assert(PyBytes_Check(tmp));
2858     n = PyBytes_GET_SIZE(tmp);
2859     pnew = type->tp_alloc(type, n);
2860     if (pnew != NULL) {
2861         memcpy(PyBytes_AS_STRING(pnew),
2862                   PyBytes_AS_STRING(tmp), n+1);
2863         ((PyBytesObject *)pnew)->ob_shash =
2864             ((PyBytesObject *)tmp)->ob_shash;
2865     }
2866     Py_DECREF(tmp);
2867     return pnew;
2868 }
2869 
2870 PyDoc_STRVAR(bytes_doc,
2871 "bytes(iterable_of_ints) -> bytes\n\
2872 bytes(string, encoding[, errors]) -> bytes\n\
2873 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2874 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2875 bytes() -> empty bytes object\n\
2876 \n\
2877 Construct an immutable array of bytes from:\n\
2878   - an iterable yielding integers in range(256)\n\
2879   - a text string encoded using the specified encoding\n\
2880   - any object implementing the buffer API.\n\
2881   - an integer");
2882 
2883 static PyObject *bytes_iter(PyObject *seq);
2884 
2885 PyTypeObject PyBytes_Type = {
2886     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2887     "bytes",
2888     PyBytesObject_SIZE,
2889     sizeof(char),
2890     0,                                          /* tp_dealloc */
2891     0,                                          /* tp_vectorcall_offset */
2892     0,                                          /* tp_getattr */
2893     0,                                          /* tp_setattr */
2894     0,                                          /* tp_as_async */
2895     (reprfunc)bytes_repr,                       /* tp_repr */
2896     &bytes_as_number,                           /* tp_as_number */
2897     &bytes_as_sequence,                         /* tp_as_sequence */
2898     &bytes_as_mapping,                          /* tp_as_mapping */
2899     (hashfunc)bytes_hash,                       /* tp_hash */
2900     0,                                          /* tp_call */
2901     bytes_str,                                  /* tp_str */
2902     PyObject_GenericGetAttr,                    /* tp_getattro */
2903     0,                                          /* tp_setattro */
2904     &bytes_as_buffer,                           /* tp_as_buffer */
2905     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2906         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2907     bytes_doc,                                  /* tp_doc */
2908     0,                                          /* tp_traverse */
2909     0,                                          /* tp_clear */
2910     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2911     0,                                          /* tp_weaklistoffset */
2912     bytes_iter,                                 /* tp_iter */
2913     0,                                          /* tp_iternext */
2914     bytes_methods,                              /* tp_methods */
2915     0,                                          /* tp_members */
2916     0,                                          /* tp_getset */
2917     &PyBaseObject_Type,                         /* tp_base */
2918     0,                                          /* tp_dict */
2919     0,                                          /* tp_descr_get */
2920     0,                                          /* tp_descr_set */
2921     0,                                          /* tp_dictoffset */
2922     0,                                          /* tp_init */
2923     0,                                          /* tp_alloc */
2924     bytes_new,                                  /* tp_new */
2925     PyObject_Del,                               /* tp_free */
2926 };
2927 
2928 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2929 PyBytes_Concat(PyObject **pv, PyObject *w)
2930 {
2931     assert(pv != NULL);
2932     if (*pv == NULL)
2933         return;
2934     if (w == NULL) {
2935         Py_CLEAR(*pv);
2936         return;
2937     }
2938 
2939     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2940         /* Only one reference, so we can resize in place */
2941         Py_ssize_t oldsize;
2942         Py_buffer wb;
2943 
2944         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2945             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2946                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2947             Py_CLEAR(*pv);
2948             return;
2949         }
2950 
2951         oldsize = PyBytes_GET_SIZE(*pv);
2952         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2953             PyErr_NoMemory();
2954             goto error;
2955         }
2956         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2957             goto error;
2958 
2959         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2960         PyBuffer_Release(&wb);
2961         return;
2962 
2963       error:
2964         PyBuffer_Release(&wb);
2965         Py_CLEAR(*pv);
2966         return;
2967     }
2968 
2969     else {
2970         /* Multiple references, need to create new object */
2971         PyObject *v;
2972         v = bytes_concat(*pv, w);
2973         Py_SETREF(*pv, v);
2974     }
2975 }
2976 
2977 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2978 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2979 {
2980     PyBytes_Concat(pv, w);
2981     Py_XDECREF(w);
2982 }
2983 
2984 
2985 /* The following function breaks the notion that bytes are immutable:
2986    it changes the size of a bytes object.  We get away with this only if there
2987    is only one module referencing the object.  You can also think of it
2988    as creating a new bytes object and destroying the old one, only
2989    more efficiently.  In any case, don't use this if the bytes object may
2990    already be known to some other part of the code...
2991    Note that if there's not enough memory to resize the bytes object, the
2992    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2993    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2994    returned, and the value in *pv may or may not be the same as on input.
2995    As always, an extra byte is allocated for a trailing \0 byte (newsize
2996    does *not* include that), and a trailing \0 byte is stored.
2997 */
2998 
2999 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3000 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3001 {
3002     PyObject *v;
3003     PyBytesObject *sv;
3004     v = *pv;
3005     if (!PyBytes_Check(v) || newsize < 0) {
3006         goto error;
3007     }
3008     if (Py_SIZE(v) == newsize) {
3009         /* return early if newsize equals to v->ob_size */
3010         return 0;
3011     }
3012     if (Py_SIZE(v) == 0) {
3013         if (newsize == 0) {
3014             return 0;
3015         }
3016         *pv = _PyBytes_FromSize(newsize, 0);
3017         Py_DECREF(v);
3018         return (*pv == NULL) ? -1 : 0;
3019     }
3020     if (Py_REFCNT(v) != 1) {
3021         goto error;
3022     }
3023     if (newsize == 0) {
3024         *pv = _PyBytes_FromSize(0, 0);
3025         Py_DECREF(v);
3026         return (*pv == NULL) ? -1 : 0;
3027     }
3028     /* XXX UNREF/NEWREF interface should be more symmetrical */
3029 #ifdef Py_REF_DEBUG
3030     _Py_RefTotal--;
3031 #endif
3032 #ifdef Py_TRACE_REFS
3033     _Py_ForgetReference(v);
3034 #endif
3035     *pv = (PyObject *)
3036         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3037     if (*pv == NULL) {
3038         PyObject_Del(v);
3039         PyErr_NoMemory();
3040         return -1;
3041     }
3042     _Py_NewReference(*pv);
3043     sv = (PyBytesObject *) *pv;
3044     Py_SET_SIZE(sv, newsize);
3045     sv->ob_sval[newsize] = '\0';
3046     sv->ob_shash = -1;          /* invalidate cached hash value */
3047     return 0;
3048 error:
3049     *pv = 0;
3050     Py_DECREF(v);
3051     PyErr_BadInternalCall();
3052     return -1;
3053 }
3054 
3055 void
_PyBytes_Fini(void)3056 _PyBytes_Fini(void)
3057 {
3058     int i;
3059     for (i = 0; i < UCHAR_MAX + 1; i++)
3060         Py_CLEAR(characters[i]);
3061     Py_CLEAR(nullstring);
3062 }
3063 
3064 /*********************** Bytes Iterator ****************************/
3065 
3066 typedef struct {
3067     PyObject_HEAD
3068     Py_ssize_t it_index;
3069     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3070 } striterobject;
3071 
3072 static void
striter_dealloc(striterobject * it)3073 striter_dealloc(striterobject *it)
3074 {
3075     _PyObject_GC_UNTRACK(it);
3076     Py_XDECREF(it->it_seq);
3077     PyObject_GC_Del(it);
3078 }
3079 
3080 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3081 striter_traverse(striterobject *it, visitproc visit, void *arg)
3082 {
3083     Py_VISIT(it->it_seq);
3084     return 0;
3085 }
3086 
3087 static PyObject *
striter_next(striterobject * it)3088 striter_next(striterobject *it)
3089 {
3090     PyBytesObject *seq;
3091     PyObject *item;
3092 
3093     assert(it != NULL);
3094     seq = it->it_seq;
3095     if (seq == NULL)
3096         return NULL;
3097     assert(PyBytes_Check(seq));
3098 
3099     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3100         item = PyLong_FromLong(
3101             (unsigned char)seq->ob_sval[it->it_index]);
3102         if (item != NULL)
3103             ++it->it_index;
3104         return item;
3105     }
3106 
3107     it->it_seq = NULL;
3108     Py_DECREF(seq);
3109     return NULL;
3110 }
3111 
3112 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3113 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3114 {
3115     Py_ssize_t len = 0;
3116     if (it->it_seq)
3117         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3118     return PyLong_FromSsize_t(len);
3119 }
3120 
3121 PyDoc_STRVAR(length_hint_doc,
3122              "Private method returning an estimate of len(list(it)).");
3123 
3124 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3125 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3126 {
3127     _Py_IDENTIFIER(iter);
3128     if (it->it_seq != NULL) {
3129         return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3130                              it->it_seq, it->it_index);
3131     } else {
3132         return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3133     }
3134 }
3135 
3136 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3137 
3138 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3139 striter_setstate(striterobject *it, PyObject *state)
3140 {
3141     Py_ssize_t index = PyLong_AsSsize_t(state);
3142     if (index == -1 && PyErr_Occurred())
3143         return NULL;
3144     if (it->it_seq != NULL) {
3145         if (index < 0)
3146             index = 0;
3147         else if (index > PyBytes_GET_SIZE(it->it_seq))
3148             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3149         it->it_index = index;
3150     }
3151     Py_RETURN_NONE;
3152 }
3153 
3154 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3155 
3156 static PyMethodDef striter_methods[] = {
3157     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3158      length_hint_doc},
3159     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3160      reduce_doc},
3161     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3162      setstate_doc},
3163     {NULL,              NULL}           /* sentinel */
3164 };
3165 
3166 PyTypeObject PyBytesIter_Type = {
3167     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3168     "bytes_iterator",                           /* tp_name */
3169     sizeof(striterobject),                      /* tp_basicsize */
3170     0,                                          /* tp_itemsize */
3171     /* methods */
3172     (destructor)striter_dealloc,                /* tp_dealloc */
3173     0,                                          /* tp_vectorcall_offset */
3174     0,                                          /* tp_getattr */
3175     0,                                          /* tp_setattr */
3176     0,                                          /* tp_as_async */
3177     0,                                          /* tp_repr */
3178     0,                                          /* tp_as_number */
3179     0,                                          /* tp_as_sequence */
3180     0,                                          /* tp_as_mapping */
3181     0,                                          /* tp_hash */
3182     0,                                          /* tp_call */
3183     0,                                          /* tp_str */
3184     PyObject_GenericGetAttr,                    /* tp_getattro */
3185     0,                                          /* tp_setattro */
3186     0,                                          /* tp_as_buffer */
3187     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3188     0,                                          /* tp_doc */
3189     (traverseproc)striter_traverse,     /* tp_traverse */
3190     0,                                          /* tp_clear */
3191     0,                                          /* tp_richcompare */
3192     0,                                          /* tp_weaklistoffset */
3193     PyObject_SelfIter,                          /* tp_iter */
3194     (iternextfunc)striter_next,                 /* tp_iternext */
3195     striter_methods,                            /* tp_methods */
3196     0,
3197 };
3198 
3199 static PyObject *
bytes_iter(PyObject * seq)3200 bytes_iter(PyObject *seq)
3201 {
3202     striterobject *it;
3203 
3204     if (!PyBytes_Check(seq)) {
3205         PyErr_BadInternalCall();
3206         return NULL;
3207     }
3208     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3209     if (it == NULL)
3210         return NULL;
3211     it->it_index = 0;
3212     Py_INCREF(seq);
3213     it->it_seq = (PyBytesObject *)seq;
3214     _PyObject_GC_TRACK(it);
3215     return (PyObject *)it;
3216 }
3217 
3218 
3219 /* _PyBytesWriter API */
3220 
3221 #ifdef MS_WINDOWS
3222    /* On Windows, overallocate by 50% is the best factor */
3223 #  define OVERALLOCATE_FACTOR 2
3224 #else
3225    /* On Linux, overallocate by 25% is the best factor */
3226 #  define OVERALLOCATE_FACTOR 4
3227 #endif
3228 
3229 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3230 _PyBytesWriter_Init(_PyBytesWriter *writer)
3231 {
3232     /* Set all attributes before small_buffer to 0 */
3233     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3234 #ifndef NDEBUG
3235     memset(writer->small_buffer, PYMEM_CLEANBYTE,
3236            sizeof(writer->small_buffer));
3237 #endif
3238 }
3239 
3240 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3241 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3242 {
3243     Py_CLEAR(writer->buffer);
3244 }
3245 
3246 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3247 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3248 {
3249     if (writer->use_small_buffer) {
3250         assert(writer->buffer == NULL);
3251         return writer->small_buffer;
3252     }
3253     else if (writer->use_bytearray) {
3254         assert(writer->buffer != NULL);
3255         return PyByteArray_AS_STRING(writer->buffer);
3256     }
3257     else {
3258         assert(writer->buffer != NULL);
3259         return PyBytes_AS_STRING(writer->buffer);
3260     }
3261 }
3262 
3263 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3264 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3265 {
3266     const char *start = _PyBytesWriter_AsString(writer);
3267     assert(str != NULL);
3268     assert(str >= start);
3269     assert(str - start <= writer->allocated);
3270     return str - start;
3271 }
3272 
3273 #ifndef NDEBUG
3274 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3275 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3276 {
3277     const char *start, *end;
3278 
3279     if (writer->use_small_buffer) {
3280         assert(writer->buffer == NULL);
3281     }
3282     else {
3283         assert(writer->buffer != NULL);
3284         if (writer->use_bytearray)
3285             assert(PyByteArray_CheckExact(writer->buffer));
3286         else
3287             assert(PyBytes_CheckExact(writer->buffer));
3288         assert(Py_REFCNT(writer->buffer) == 1);
3289     }
3290 
3291     if (writer->use_bytearray) {
3292         /* bytearray has its own overallocation algorithm,
3293            writer overallocation must be disabled */
3294         assert(!writer->overallocate);
3295     }
3296 
3297     assert(0 <= writer->allocated);
3298     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3299     /* the last byte must always be null */
3300     start = _PyBytesWriter_AsString(writer);
3301     assert(start[writer->allocated] == 0);
3302 
3303     end = start + writer->allocated;
3304     assert(str != NULL);
3305     assert(start <= str && str <= end);
3306     return 1;
3307 }
3308 #endif
3309 
3310 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3311 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3312 {
3313     Py_ssize_t allocated, pos;
3314 
3315     assert(_PyBytesWriter_CheckConsistency(writer, str));
3316     assert(writer->allocated < size);
3317 
3318     allocated = size;
3319     if (writer->overallocate
3320         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3321         /* overallocate to limit the number of realloc() */
3322         allocated += allocated / OVERALLOCATE_FACTOR;
3323     }
3324 
3325     pos = _PyBytesWriter_GetSize(writer, str);
3326     if (!writer->use_small_buffer) {
3327         if (writer->use_bytearray) {
3328             if (PyByteArray_Resize(writer->buffer, allocated))
3329                 goto error;
3330             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3331                but we cannot use ob_alloc because bytes may need to be moved
3332                to use the whole buffer. bytearray uses an internal optimization
3333                to avoid moving or copying bytes when bytes are removed at the
3334                beginning (ex: del bytearray[:1]). */
3335         }
3336         else {
3337             if (_PyBytes_Resize(&writer->buffer, allocated))
3338                 goto error;
3339         }
3340     }
3341     else {
3342         /* convert from stack buffer to bytes object buffer */
3343         assert(writer->buffer == NULL);
3344 
3345         if (writer->use_bytearray)
3346             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3347         else
3348             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3349         if (writer->buffer == NULL)
3350             goto error;
3351 
3352         if (pos != 0) {
3353             char *dest;
3354             if (writer->use_bytearray)
3355                 dest = PyByteArray_AS_STRING(writer->buffer);
3356             else
3357                 dest = PyBytes_AS_STRING(writer->buffer);
3358             memcpy(dest,
3359                       writer->small_buffer,
3360                       pos);
3361         }
3362 
3363         writer->use_small_buffer = 0;
3364 #ifndef NDEBUG
3365         memset(writer->small_buffer, PYMEM_CLEANBYTE,
3366                sizeof(writer->small_buffer));
3367 #endif
3368     }
3369     writer->allocated = allocated;
3370 
3371     str = _PyBytesWriter_AsString(writer) + pos;
3372     assert(_PyBytesWriter_CheckConsistency(writer, str));
3373     return str;
3374 
3375 error:
3376     _PyBytesWriter_Dealloc(writer);
3377     return NULL;
3378 }
3379 
3380 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3381 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3382 {
3383     Py_ssize_t new_min_size;
3384 
3385     assert(_PyBytesWriter_CheckConsistency(writer, str));
3386     assert(size >= 0);
3387 
3388     if (size == 0) {
3389         /* nothing to do */
3390         return str;
3391     }
3392 
3393     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3394         PyErr_NoMemory();
3395         _PyBytesWriter_Dealloc(writer);
3396         return NULL;
3397     }
3398     new_min_size = writer->min_size + size;
3399 
3400     if (new_min_size > writer->allocated)
3401         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3402 
3403     writer->min_size = new_min_size;
3404     return str;
3405 }
3406 
3407 /* Allocate the buffer to write size bytes.
3408    Return the pointer to the beginning of buffer data.
3409    Raise an exception and return NULL on error. */
3410 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3411 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3412 {
3413     /* ensure that _PyBytesWriter_Alloc() is only called once */
3414     assert(writer->min_size == 0 && writer->buffer == NULL);
3415     assert(size >= 0);
3416 
3417     writer->use_small_buffer = 1;
3418 #ifndef NDEBUG
3419     writer->allocated = sizeof(writer->small_buffer) - 1;
3420     /* In debug mode, don't use the full small buffer because it is less
3421        efficient than bytes and bytearray objects to detect buffer underflow
3422        and buffer overflow. Use 10 bytes of the small buffer to test also
3423        code using the smaller buffer in debug mode.
3424 
3425        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3426        in debug mode to also be able to detect stack overflow when running
3427        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3428        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3429        stack overflow. */
3430     writer->allocated = Py_MIN(writer->allocated, 10);
3431     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3432        to detect buffer overflow */
3433     writer->small_buffer[writer->allocated] = 0;
3434 #else
3435     writer->allocated = sizeof(writer->small_buffer);
3436 #endif
3437     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3438 }
3439 
3440 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3441 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3442 {
3443     Py_ssize_t size;
3444     PyObject *result;
3445 
3446     assert(_PyBytesWriter_CheckConsistency(writer, str));
3447 
3448     size = _PyBytesWriter_GetSize(writer, str);
3449     if (size == 0 && !writer->use_bytearray) {
3450         Py_CLEAR(writer->buffer);
3451         /* Get the empty byte string singleton */
3452         result = PyBytes_FromStringAndSize(NULL, 0);
3453     }
3454     else if (writer->use_small_buffer) {
3455         if (writer->use_bytearray) {
3456             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3457         }
3458         else {
3459             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3460         }
3461     }
3462     else {
3463         result = writer->buffer;
3464         writer->buffer = NULL;
3465 
3466         if (size != writer->allocated) {
3467             if (writer->use_bytearray) {
3468                 if (PyByteArray_Resize(result, size)) {
3469                     Py_DECREF(result);
3470                     return NULL;
3471                 }
3472             }
3473             else {
3474                 if (_PyBytes_Resize(&result, size)) {
3475                     assert(result == NULL);
3476                     return NULL;
3477                 }
3478             }
3479         }
3480     }
3481     return result;
3482 }
3483 
3484 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3485 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3486                           const void *bytes, Py_ssize_t size)
3487 {
3488     char *str = (char *)ptr;
3489 
3490     str = _PyBytesWriter_Prepare(writer, str, size);
3491     if (str == NULL)
3492         return NULL;
3493 
3494     memcpy(str, bytes, size);
3495     str += size;
3496 
3497     return str;
3498 }
3499