1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "pycore_abstract.h" // _PyIndex_Check()
7 #include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
8 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
9 #include "pycore_call.h" // _PyObject_CallNoArgs()
10 #include "pycore_format.h" // F_LJUST
11 #include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT()
12 #include "pycore_initconfig.h" // _PyStatus_OK()
13 #include "pycore_long.h" // _PyLong_DigitValue
14 #include "pycore_object.h" // _PyObject_GC_TRACK
15 #include "pycore_pymem.h" // PYMEM_CLEANBYTE
16 #include "pycore_strhex.h" // _Py_strhex_with_sep()
17
18 #include <stddef.h>
19
20 /*[clinic input]
21 class bytes "PyBytesObject *" "&PyBytes_Type"
22 [clinic start generated code]*/
23 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24
25 #include "clinic/bytesobject.c.h"
26
27 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28 for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 or 7 bytes per bytes object allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
39
40 #define CHARACTERS _Py_SINGLETON(bytes_characters)
41 #define CHARACTER(ch) \
42 ((PyBytesObject *)&(CHARACTERS[ch]));
43 #define EMPTY (&_Py_SINGLETON(bytes_empty))
44
45
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49 return &EMPTY->ob_base.ob_base;
50 }
51
52
53 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)54 static inline PyObject* bytes_new_empty(void)
55 {
56 Py_INCREF(EMPTY);
57 return (PyObject *)EMPTY;
58 }
59
60
61 /*
62 For PyBytes_FromString(), the parameter `str' points to a null-terminated
63 string containing exactly `size' bytes.
64
65 For PyBytes_FromStringAndSize(), the parameter `str' is
66 either NULL or else points to a string containing at least `size' bytes.
67 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
68 not have to be null-terminated. (Therefore it is safe to construct a
69 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
70 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
71 bytes (setting the last byte to the null terminating character) and you can
72 fill in the data yourself. If `str' is non-NULL then the resulting
73 PyBytes object must be treated as immutable and you must not fill in nor
74 alter the data yourself, since the strings may be shared.
75
76 The PyObject member `op->ob_size', which denotes the number of "extra
77 items" in a variable-size object, will contain the number of bytes
78 allocated for string data, not counting the null terminating character.
79 It is therefore equal to the `size' parameter (for
80 PyBytes_FromStringAndSize()) or the length of the string in the `str'
81 parameter (for PyBytes_FromString()).
82 */
83 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)84 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
85 {
86 PyBytesObject *op;
87 assert(size >= 0);
88
89 if (size == 0) {
90 return bytes_new_empty();
91 }
92
93 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
94 PyErr_SetString(PyExc_OverflowError,
95 "byte string is too large");
96 return NULL;
97 }
98
99 /* Inline PyObject_NewVar */
100 if (use_calloc)
101 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 else
103 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
104 if (op == NULL) {
105 return PyErr_NoMemory();
106 }
107 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
108 _Py_COMP_DIAG_PUSH
109 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
110 op->ob_shash = -1;
111 _Py_COMP_DIAG_POP
112 if (!use_calloc) {
113 op->ob_sval[size] = '\0';
114 }
115 return (PyObject *) op;
116 }
117
118 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)119 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120 {
121 PyBytesObject *op;
122 if (size < 0) {
123 PyErr_SetString(PyExc_SystemError,
124 "Negative size passed to PyBytes_FromStringAndSize");
125 return NULL;
126 }
127 if (size == 1 && str != NULL) {
128 op = CHARACTER(*str & 255);
129 Py_INCREF(op);
130 return (PyObject *)op;
131 }
132 if (size == 0) {
133 return bytes_new_empty();
134 }
135
136 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137 if (op == NULL)
138 return NULL;
139 if (str == NULL)
140 return (PyObject *) op;
141
142 memcpy(op->ob_sval, str, size);
143 return (PyObject *) op;
144 }
145
146 PyObject *
PyBytes_FromString(const char * str)147 PyBytes_FromString(const char *str)
148 {
149 size_t size;
150 PyBytesObject *op;
151
152 assert(str != NULL);
153 size = strlen(str);
154 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155 PyErr_SetString(PyExc_OverflowError,
156 "byte string is too long");
157 return NULL;
158 }
159
160 if (size == 0) {
161 return bytes_new_empty();
162 }
163 else if (size == 1) {
164 op = CHARACTER(*str & 255);
165 Py_INCREF(op);
166 return (PyObject *)op;
167 }
168
169 /* Inline PyObject_NewVar */
170 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
171 if (op == NULL) {
172 return PyErr_NoMemory();
173 }
174 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
175 _Py_COMP_DIAG_PUSH
176 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
177 op->ob_shash = -1;
178 _Py_COMP_DIAG_POP
179 memcpy(op->ob_sval, str, size+1);
180 return (PyObject *) op;
181 }
182
183 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)184 PyBytes_FromFormatV(const char *format, va_list vargs)
185 {
186 char *s;
187 const char *f;
188 const char *p;
189 Py_ssize_t prec;
190 int longflag;
191 int size_tflag;
192 /* Longest 64-bit formatted numbers:
193 - "18446744073709551615\0" (21 bytes)
194 - "-9223372036854775808\0" (21 bytes)
195 Decimal takes the most space (it isn't enough for octal.)
196
197 Longest 64-bit pointer representation:
198 "0xffffffffffffffff\0" (19 bytes). */
199 char buffer[21];
200 _PyBytesWriter writer;
201
202 _PyBytesWriter_Init(&writer);
203
204 s = _PyBytesWriter_Alloc(&writer, strlen(format));
205 if (s == NULL)
206 return NULL;
207 writer.overallocate = 1;
208
209 #define WRITE_BYTES(str) \
210 do { \
211 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212 if (s == NULL) \
213 goto error; \
214 } while (0)
215
216 for (f = format; *f; f++) {
217 if (*f != '%') {
218 *s++ = *f;
219 continue;
220 }
221
222 p = f++;
223
224 /* ignore the width (ex: 10 in "%10s") */
225 while (Py_ISDIGIT(*f))
226 f++;
227
228 /* parse the precision (ex: 10 in "%.10s") */
229 prec = 0;
230 if (*f == '.') {
231 f++;
232 for (; Py_ISDIGIT(*f); f++) {
233 prec = (prec * 10) + (*f - '0');
234 }
235 }
236
237 while (*f && *f != '%' && !Py_ISALPHA(*f))
238 f++;
239
240 /* handle the long flag ('l'), but only for %ld and %lu.
241 others can be added when necessary. */
242 longflag = 0;
243 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244 longflag = 1;
245 ++f;
246 }
247
248 /* handle the size_t flag ('z'). */
249 size_tflag = 0;
250 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251 size_tflag = 1;
252 ++f;
253 }
254
255 /* subtract bytes preallocated for the format string
256 (ex: 2 for "%s") */
257 writer.min_size -= (f - p + 1);
258
259 switch (*f) {
260 case 'c':
261 {
262 int c = va_arg(vargs, int);
263 if (c < 0 || c > 255) {
264 PyErr_SetString(PyExc_OverflowError,
265 "PyBytes_FromFormatV(): %c format "
266 "expects an integer in range [0; 255]");
267 goto error;
268 }
269 writer.min_size++;
270 *s++ = (unsigned char)c;
271 break;
272 }
273
274 case 'd':
275 if (longflag) {
276 sprintf(buffer, "%ld", va_arg(vargs, long));
277 }
278 else if (size_tflag) {
279 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
280 }
281 else {
282 sprintf(buffer, "%d", va_arg(vargs, int));
283 }
284 assert(strlen(buffer) < sizeof(buffer));
285 WRITE_BYTES(buffer);
286 break;
287
288 case 'u':
289 if (longflag) {
290 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
291 }
292 else if (size_tflag) {
293 sprintf(buffer, "%zu", va_arg(vargs, size_t));
294 }
295 else {
296 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
297 }
298 assert(strlen(buffer) < sizeof(buffer));
299 WRITE_BYTES(buffer);
300 break;
301
302 case 'i':
303 sprintf(buffer, "%i", va_arg(vargs, int));
304 assert(strlen(buffer) < sizeof(buffer));
305 WRITE_BYTES(buffer);
306 break;
307
308 case 'x':
309 sprintf(buffer, "%x", va_arg(vargs, int));
310 assert(strlen(buffer) < sizeof(buffer));
311 WRITE_BYTES(buffer);
312 break;
313
314 case 's':
315 {
316 Py_ssize_t i;
317
318 p = va_arg(vargs, const char*);
319 if (prec <= 0) {
320 i = strlen(p);
321 }
322 else {
323 i = 0;
324 while (i < prec && p[i]) {
325 i++;
326 }
327 }
328 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
329 if (s == NULL)
330 goto error;
331 break;
332 }
333
334 case 'p':
335 sprintf(buffer, "%p", va_arg(vargs, void*));
336 assert(strlen(buffer) < sizeof(buffer));
337 /* %p is ill-defined: ensure leading 0x. */
338 if (buffer[1] == 'X')
339 buffer[1] = 'x';
340 else if (buffer[1] != 'x') {
341 memmove(buffer+2, buffer, strlen(buffer)+1);
342 buffer[0] = '0';
343 buffer[1] = 'x';
344 }
345 WRITE_BYTES(buffer);
346 break;
347
348 case '%':
349 writer.min_size++;
350 *s++ = '%';
351 break;
352
353 default:
354 if (*f == 0) {
355 /* fix min_size if we reached the end of the format string */
356 writer.min_size++;
357 }
358
359 /* invalid format string: copy unformatted string and exit */
360 WRITE_BYTES(p);
361 return _PyBytesWriter_Finish(&writer, s);
362 }
363 }
364
365 #undef WRITE_BYTES
366
367 return _PyBytesWriter_Finish(&writer, s);
368
369 error:
370 _PyBytesWriter_Dealloc(&writer);
371 return NULL;
372 }
373
374 PyObject *
PyBytes_FromFormat(const char * format,...)375 PyBytes_FromFormat(const char *format, ...)
376 {
377 PyObject* ret;
378 va_list vargs;
379
380 #ifdef HAVE_STDARG_PROTOTYPES
381 va_start(vargs, format);
382 #else
383 va_start(vargs);
384 #endif
385 ret = PyBytes_FromFormatV(format, vargs);
386 va_end(vargs);
387 return ret;
388 }
389
390 /* Helpers for formatstring */
391
392 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)393 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
394 {
395 Py_ssize_t argidx = *p_argidx;
396 if (argidx < arglen) {
397 (*p_argidx)++;
398 if (arglen < 0)
399 return args;
400 else
401 return PyTuple_GetItem(args, argidx);
402 }
403 PyErr_SetString(PyExc_TypeError,
404 "not enough arguments for format string");
405 return NULL;
406 }
407
408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
409
410 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)411 formatfloat(PyObject *v, int flags, int prec, int type,
412 PyObject **p_result, _PyBytesWriter *writer, char *str)
413 {
414 char *p;
415 PyObject *result;
416 double x;
417 size_t len;
418 int dtoa_flags = 0;
419
420 x = PyFloat_AsDouble(v);
421 if (x == -1.0 && PyErr_Occurred()) {
422 PyErr_Format(PyExc_TypeError, "float argument required, "
423 "not %.200s", Py_TYPE(v)->tp_name);
424 return NULL;
425 }
426
427 if (prec < 0)
428 prec = 6;
429
430 if (flags & F_ALT) {
431 dtoa_flags |= Py_DTSF_ALT;
432 }
433 p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
434
435 if (p == NULL)
436 return NULL;
437
438 len = strlen(p);
439 if (writer != NULL) {
440 str = _PyBytesWriter_Prepare(writer, str, len);
441 if (str == NULL) {
442 PyMem_Free(p);
443 return NULL;
444 }
445 memcpy(str, p, len);
446 PyMem_Free(p);
447 str += len;
448 return str;
449 }
450
451 result = PyBytes_FromStringAndSize(p, len);
452 PyMem_Free(p);
453 *p_result = result;
454 return result != NULL ? str : NULL;
455 }
456
457 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)458 formatlong(PyObject *v, int flags, int prec, int type)
459 {
460 PyObject *result, *iobj;
461 if (type == 'i')
462 type = 'd';
463 if (PyLong_Check(v))
464 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
465 if (PyNumber_Check(v)) {
466 /* make sure number is a type of integer for o, x, and X */
467 if (type == 'o' || type == 'x' || type == 'X')
468 iobj = _PyNumber_Index(v);
469 else
470 iobj = PyNumber_Long(v);
471 if (iobj != NULL) {
472 assert(PyLong_Check(iobj));
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 if (!PyErr_ExceptionMatches(PyExc_TypeError))
478 return NULL;
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a real number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486 }
487
488 static int
byte_converter(PyObject * arg,char * p)489 byte_converter(PyObject *arg, char *p)
490 {
491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
494 }
495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
498 }
499 else {
500 int overflow;
501 long ival = PyLong_AsLongAndOverflow(arg, &overflow);
502 if (ival == -1 && PyErr_Occurred()) {
503 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
504 goto onError;
505 }
506 return 0;
507 }
508 if (!(0 <= ival && ival <= 255)) {
509 /* this includes an overflow in converting to C long */
510 PyErr_SetString(PyExc_OverflowError,
511 "%c arg not in range(256)");
512 return 0;
513 }
514 *p = (char)ival;
515 return 1;
516 }
517 onError:
518 PyErr_SetString(PyExc_TypeError,
519 "%c requires an integer in range(256) or a single byte");
520 return 0;
521 }
522
523 static PyObject *_PyBytes_FromBuffer(PyObject *x);
524
525 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)526 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
527 {
528 PyObject *func, *result;
529 /* is it a bytes object? */
530 if (PyBytes_Check(v)) {
531 *pbuf = PyBytes_AS_STRING(v);
532 *plen = PyBytes_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
535 }
536 if (PyByteArray_Check(v)) {
537 *pbuf = PyByteArray_AS_STRING(v);
538 *plen = PyByteArray_GET_SIZE(v);
539 Py_INCREF(v);
540 return v;
541 }
542 /* does it support __bytes__? */
543 func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
544 if (func != NULL) {
545 result = _PyObject_CallNoArgs(func);
546 Py_DECREF(func);
547 if (result == NULL)
548 return NULL;
549 if (!PyBytes_Check(result)) {
550 PyErr_Format(PyExc_TypeError,
551 "__bytes__ returned non-bytes (type %.200s)",
552 Py_TYPE(result)->tp_name);
553 Py_DECREF(result);
554 return NULL;
555 }
556 *pbuf = PyBytes_AS_STRING(result);
557 *plen = PyBytes_GET_SIZE(result);
558 return result;
559 }
560 /* does it support buffer protocol? */
561 if (PyObject_CheckBuffer(v)) {
562 /* maybe we can avoid making a copy of the buffer object here? */
563 result = _PyBytes_FromBuffer(v);
564 if (result == NULL)
565 return NULL;
566 *pbuf = PyBytes_AS_STRING(result);
567 *plen = PyBytes_GET_SIZE(result);
568 return result;
569 }
570 PyErr_Format(PyExc_TypeError,
571 "%%b requires a bytes-like object, "
572 "or an object that implements __bytes__, not '%.100s'",
573 Py_TYPE(v)->tp_name);
574 return NULL;
575 }
576
577 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
578
579 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)580 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
581 PyObject *args, int use_bytearray)
582 {
583 const char *fmt;
584 char *res;
585 Py_ssize_t arglen, argidx;
586 Py_ssize_t fmtcnt;
587 int args_owned = 0;
588 PyObject *dict = NULL;
589 _PyBytesWriter writer;
590
591 if (args == NULL) {
592 PyErr_BadInternalCall();
593 return NULL;
594 }
595 fmt = format;
596 fmtcnt = format_len;
597
598 _PyBytesWriter_Init(&writer);
599 writer.use_bytearray = use_bytearray;
600
601 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
602 if (res == NULL)
603 return NULL;
604 if (!use_bytearray)
605 writer.overallocate = 1;
606
607 if (PyTuple_Check(args)) {
608 arglen = PyTuple_GET_SIZE(args);
609 argidx = 0;
610 }
611 else {
612 arglen = -1;
613 argidx = -2;
614 }
615 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
616 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
617 !PyByteArray_Check(args)) {
618 dict = args;
619 }
620
621 while (--fmtcnt >= 0) {
622 if (*fmt != '%') {
623 Py_ssize_t len;
624 char *pos;
625
626 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
627 if (pos != NULL)
628 len = pos - fmt;
629 else
630 len = fmtcnt + 1;
631 assert(len != 0);
632
633 memcpy(res, fmt, len);
634 res += len;
635 fmt += len;
636 fmtcnt -= (len - 1);
637 }
638 else {
639 /* Got a format specifier */
640 int flags = 0;
641 Py_ssize_t width = -1;
642 int prec = -1;
643 int c = '\0';
644 int fill;
645 PyObject *v = NULL;
646 PyObject *temp = NULL;
647 const char *pbuf = NULL;
648 int sign;
649 Py_ssize_t len = 0;
650 char onechar; /* For byte_converter() */
651 Py_ssize_t alloc;
652
653 fmt++;
654 if (*fmt == '%') {
655 *res++ = '%';
656 fmt++;
657 fmtcnt--;
658 continue;
659 }
660 if (*fmt == '(') {
661 const char *keystart;
662 Py_ssize_t keylen;
663 PyObject *key;
664 int pcount = 1;
665
666 if (dict == NULL) {
667 PyErr_SetString(PyExc_TypeError,
668 "format requires a mapping");
669 goto error;
670 }
671 ++fmt;
672 --fmtcnt;
673 keystart = fmt;
674 /* Skip over balanced parentheses */
675 while (pcount > 0 && --fmtcnt >= 0) {
676 if (*fmt == ')')
677 --pcount;
678 else if (*fmt == '(')
679 ++pcount;
680 fmt++;
681 }
682 keylen = fmt - keystart - 1;
683 if (fmtcnt < 0 || pcount > 0) {
684 PyErr_SetString(PyExc_ValueError,
685 "incomplete format key");
686 goto error;
687 }
688 key = PyBytes_FromStringAndSize(keystart,
689 keylen);
690 if (key == NULL)
691 goto error;
692 if (args_owned) {
693 Py_DECREF(args);
694 args_owned = 0;
695 }
696 args = PyObject_GetItem(dict, key);
697 Py_DECREF(key);
698 if (args == NULL) {
699 goto error;
700 }
701 args_owned = 1;
702 arglen = -1;
703 argidx = -2;
704 }
705
706 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
707 while (--fmtcnt >= 0) {
708 switch (c = *fmt++) {
709 case '-': flags |= F_LJUST; continue;
710 case '+': flags |= F_SIGN; continue;
711 case ' ': flags |= F_BLANK; continue;
712 case '#': flags |= F_ALT; continue;
713 case '0': flags |= F_ZERO; continue;
714 }
715 break;
716 }
717
718 /* Parse width. Example: "%10s" => width=10 */
719 if (c == '*') {
720 v = getnextarg(args, arglen, &argidx);
721 if (v == NULL)
722 goto error;
723 if (!PyLong_Check(v)) {
724 PyErr_SetString(PyExc_TypeError,
725 "* wants int");
726 goto error;
727 }
728 width = PyLong_AsSsize_t(v);
729 if (width == -1 && PyErr_Occurred())
730 goto error;
731 if (width < 0) {
732 flags |= F_LJUST;
733 width = -width;
734 }
735 if (--fmtcnt >= 0)
736 c = *fmt++;
737 }
738 else if (c >= 0 && isdigit(c)) {
739 width = c - '0';
740 while (--fmtcnt >= 0) {
741 c = Py_CHARMASK(*fmt++);
742 if (!isdigit(c))
743 break;
744 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
745 PyErr_SetString(
746 PyExc_ValueError,
747 "width too big");
748 goto error;
749 }
750 width = width*10 + (c - '0');
751 }
752 }
753
754 /* Parse precision. Example: "%.3f" => prec=3 */
755 if (c == '.') {
756 prec = 0;
757 if (--fmtcnt >= 0)
758 c = *fmt++;
759 if (c == '*') {
760 v = getnextarg(args, arglen, &argidx);
761 if (v == NULL)
762 goto error;
763 if (!PyLong_Check(v)) {
764 PyErr_SetString(
765 PyExc_TypeError,
766 "* wants int");
767 goto error;
768 }
769 prec = _PyLong_AsInt(v);
770 if (prec == -1 && PyErr_Occurred())
771 goto error;
772 if (prec < 0)
773 prec = 0;
774 if (--fmtcnt >= 0)
775 c = *fmt++;
776 }
777 else if (c >= 0 && isdigit(c)) {
778 prec = c - '0';
779 while (--fmtcnt >= 0) {
780 c = Py_CHARMASK(*fmt++);
781 if (!isdigit(c))
782 break;
783 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
784 PyErr_SetString(
785 PyExc_ValueError,
786 "prec too big");
787 goto error;
788 }
789 prec = prec*10 + (c - '0');
790 }
791 }
792 } /* prec */
793 if (fmtcnt >= 0) {
794 if (c == 'h' || c == 'l' || c == 'L') {
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 }
799 if (fmtcnt < 0) {
800 PyErr_SetString(PyExc_ValueError,
801 "incomplete format");
802 goto error;
803 }
804 v = getnextarg(args, arglen, &argidx);
805 if (v == NULL)
806 goto error;
807
808 if (fmtcnt == 0) {
809 /* last write: disable writer overallocation */
810 writer.overallocate = 0;
811 }
812
813 sign = 0;
814 fill = ' ';
815 switch (c) {
816 case 'r':
817 // %r is only for 2/3 code; 3 only code should use %a
818 case 'a':
819 temp = PyObject_ASCII(v);
820 if (temp == NULL)
821 goto error;
822 assert(PyUnicode_IS_ASCII(temp));
823 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
824 len = PyUnicode_GET_LENGTH(temp);
825 if (prec >= 0 && len > prec)
826 len = prec;
827 break;
828
829 case 's':
830 // %s is only for 2/3 code; 3 only code should use %b
831 case 'b':
832 temp = format_obj(v, &pbuf, &len);
833 if (temp == NULL)
834 goto error;
835 if (prec >= 0 && len > prec)
836 len = prec;
837 break;
838
839 case 'i':
840 case 'd':
841 case 'u':
842 case 'o':
843 case 'x':
844 case 'X':
845 if (PyLong_CheckExact(v)
846 && width == -1 && prec == -1
847 && !(flags & (F_SIGN | F_BLANK))
848 && c != 'X')
849 {
850 /* Fast path */
851 int alternate = flags & F_ALT;
852 int base;
853
854 switch(c)
855 {
856 default:
857 Py_UNREACHABLE();
858 case 'd':
859 case 'i':
860 case 'u':
861 base = 10;
862 break;
863 case 'o':
864 base = 8;
865 break;
866 case 'x':
867 case 'X':
868 base = 16;
869 break;
870 }
871
872 /* Fast path */
873 writer.min_size -= 2; /* size preallocated for "%d" */
874 res = _PyLong_FormatBytesWriter(&writer, res,
875 v, base, alternate);
876 if (res == NULL)
877 goto error;
878 continue;
879 }
880
881 temp = formatlong(v, flags, prec, c);
882 if (!temp)
883 goto error;
884 assert(PyUnicode_IS_ASCII(temp));
885 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
886 len = PyUnicode_GET_LENGTH(temp);
887 sign = 1;
888 if (flags & F_ZERO)
889 fill = '0';
890 break;
891
892 case 'e':
893 case 'E':
894 case 'f':
895 case 'F':
896 case 'g':
897 case 'G':
898 if (width == -1 && prec == -1
899 && !(flags & (F_SIGN | F_BLANK)))
900 {
901 /* Fast path */
902 writer.min_size -= 2; /* size preallocated for "%f" */
903 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
904 if (res == NULL)
905 goto error;
906 continue;
907 }
908
909 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
910 goto error;
911 pbuf = PyBytes_AS_STRING(temp);
912 len = PyBytes_GET_SIZE(temp);
913 sign = 1;
914 if (flags & F_ZERO)
915 fill = '0';
916 break;
917
918 case 'c':
919 pbuf = &onechar;
920 len = byte_converter(v, &onechar);
921 if (!len)
922 goto error;
923 if (width == -1) {
924 /* Fast path */
925 *res++ = onechar;
926 continue;
927 }
928 break;
929
930 default:
931 PyErr_Format(PyExc_ValueError,
932 "unsupported format character '%c' (0x%x) "
933 "at index %zd",
934 c, c,
935 (Py_ssize_t)(fmt - 1 - format));
936 goto error;
937 }
938
939 if (sign) {
940 if (*pbuf == '-' || *pbuf == '+') {
941 sign = *pbuf++;
942 len--;
943 }
944 else if (flags & F_SIGN)
945 sign = '+';
946 else if (flags & F_BLANK)
947 sign = ' ';
948 else
949 sign = 0;
950 }
951 if (width < len)
952 width = len;
953
954 alloc = width;
955 if (sign != 0 && len == width)
956 alloc++;
957 /* 2: size preallocated for %s */
958 if (alloc > 2) {
959 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
960 if (res == NULL)
961 goto error;
962 }
963 #ifndef NDEBUG
964 char *before = res;
965 #endif
966
967 /* Write the sign if needed */
968 if (sign) {
969 if (fill != ' ')
970 *res++ = sign;
971 if (width > len)
972 width--;
973 }
974
975 /* Write the numeric prefix for "x", "X" and "o" formats
976 if the alternate form is used.
977 For example, write "0x" for the "%#x" format. */
978 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
979 assert(pbuf[0] == '0');
980 assert(pbuf[1] == c);
981 if (fill != ' ') {
982 *res++ = *pbuf++;
983 *res++ = *pbuf++;
984 }
985 width -= 2;
986 if (width < 0)
987 width = 0;
988 len -= 2;
989 }
990
991 /* Pad left with the fill character if needed */
992 if (width > len && !(flags & F_LJUST)) {
993 memset(res, fill, width - len);
994 res += (width - len);
995 width = len;
996 }
997
998 /* If padding with spaces: write sign if needed and/or numeric
999 prefix if the alternate form is used */
1000 if (fill == ' ') {
1001 if (sign)
1002 *res++ = sign;
1003 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
1010
1011 /* Copy bytes */
1012 memcpy(res, pbuf, len);
1013 res += len;
1014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
1019 }
1020
1021 if (dict && (argidx < arglen)) {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
1024 Py_XDECREF(temp);
1025 goto error;
1026 }
1027 Py_XDECREF(temp);
1028
1029 #ifndef NDEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032 #endif
1033 } /* '%' */
1034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
1037 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1038 } /* until end */
1039
1040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
1045
1046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
1049 return _PyBytesWriter_Finish(&writer, res);
1050
1051 error:
1052 _PyBytesWriter_Dealloc(&writer);
1053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057 }
1058
1059 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape2(const char * s,Py_ssize_t len,const char * errors,int * first_invalid_escape_char,const char ** first_invalid_escape_ptr)1060 PyObject *_PyBytes_DecodeEscape2(const char *s,
1061 Py_ssize_t len,
1062 const char *errors,
1063 int *first_invalid_escape_char,
1064 const char **first_invalid_escape_ptr)
1065 {
1066 int c;
1067 char *p;
1068 const char *end;
1069 _PyBytesWriter writer;
1070
1071 _PyBytesWriter_Init(&writer);
1072
1073 p = _PyBytesWriter_Alloc(&writer, len);
1074 if (p == NULL)
1075 return NULL;
1076 writer.overallocate = 1;
1077
1078 *first_invalid_escape_char = -1;
1079 *first_invalid_escape_ptr = NULL;
1080
1081 end = s + len;
1082 while (s < end) {
1083 if (*s != '\\') {
1084 *p++ = *s++;
1085 continue;
1086 }
1087
1088 s++;
1089 if (s == end) {
1090 PyErr_SetString(PyExc_ValueError,
1091 "Trailing \\ in string");
1092 goto failed;
1093 }
1094
1095 switch (*s++) {
1096 /* XXX This assumes ASCII! */
1097 case '\n': break;
1098 case '\\': *p++ = '\\'; break;
1099 case '\'': *p++ = '\''; break;
1100 case '\"': *p++ = '\"'; break;
1101 case 'b': *p++ = '\b'; break;
1102 case 'f': *p++ = '\014'; break; /* FF */
1103 case 't': *p++ = '\t'; break;
1104 case 'n': *p++ = '\n'; break;
1105 case 'r': *p++ = '\r'; break;
1106 case 'v': *p++ = '\013'; break; /* VT */
1107 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1108 case '0': case '1': case '2': case '3':
1109 case '4': case '5': case '6': case '7':
1110 c = s[-1] - '0';
1111 if (s < end && '0' <= *s && *s <= '7') {
1112 c = (c<<3) + *s++ - '0';
1113 if (s < end && '0' <= *s && *s <= '7')
1114 c = (c<<3) + *s++ - '0';
1115 }
1116 if (c > 0377) {
1117 if (*first_invalid_escape_char == -1) {
1118 *first_invalid_escape_char = c;
1119 /* Back up 3 chars, since we've already incremented s. */
1120 *first_invalid_escape_ptr = s - 3;
1121 }
1122 }
1123 *p++ = c;
1124 break;
1125 case 'x':
1126 if (s+1 < end) {
1127 int digit1, digit2;
1128 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1129 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1130 if (digit1 < 16 && digit2 < 16) {
1131 *p++ = (unsigned char)((digit1 << 4) + digit2);
1132 s += 2;
1133 break;
1134 }
1135 }
1136 /* invalid hexadecimal digits */
1137
1138 if (!errors || strcmp(errors, "strict") == 0) {
1139 PyErr_Format(PyExc_ValueError,
1140 "invalid \\x escape at position %zd",
1141 s - 2 - (end - len));
1142 goto failed;
1143 }
1144 if (strcmp(errors, "replace") == 0) {
1145 *p++ = '?';
1146 } else if (strcmp(errors, "ignore") == 0)
1147 /* do nothing */;
1148 else {
1149 PyErr_Format(PyExc_ValueError,
1150 "decoding error; unknown "
1151 "error handling code: %.400s",
1152 errors);
1153 goto failed;
1154 }
1155 /* skip \x */
1156 if (s < end && Py_ISXDIGIT(s[0]))
1157 s++; /* and a hexdigit */
1158 break;
1159
1160 default:
1161 if (*first_invalid_escape_char == -1) {
1162 *first_invalid_escape_char = (unsigned char)s[-1];
1163 /* Back up one char, since we've already incremented s. */
1164 *first_invalid_escape_ptr = s - 1;
1165 }
1166 *p++ = '\\';
1167 s--;
1168 }
1169 }
1170
1171 return _PyBytesWriter_Finish(&writer, p);
1172
1173 failed:
1174 _PyBytesWriter_Dealloc(&writer);
1175 return NULL;
1176 }
1177
1178 // Export for binary compatibility.
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1179 PyObject *_PyBytes_DecodeEscape(const char *s,
1180 Py_ssize_t len,
1181 const char *errors,
1182 const char **first_invalid_escape)
1183 {
1184 int first_invalid_escape_char;
1185 return _PyBytes_DecodeEscape2(
1186 s, len, errors,
1187 &first_invalid_escape_char,
1188 first_invalid_escape);
1189 }
1190
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1191 PyObject *PyBytes_DecodeEscape(const char *s,
1192 Py_ssize_t len,
1193 const char *errors,
1194 Py_ssize_t Py_UNUSED(unicode),
1195 const char *Py_UNUSED(recode_encoding))
1196 {
1197 int first_invalid_escape_char;
1198 const char *first_invalid_escape_ptr;
1199 PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1200 &first_invalid_escape_char,
1201 &first_invalid_escape_ptr);
1202 if (result == NULL)
1203 return NULL;
1204 if (first_invalid_escape_char != -1) {
1205 if (first_invalid_escape_char > 0xff) {
1206 char buf[12] = "";
1207 snprintf(buf, sizeof buf, "%o", first_invalid_escape_char);
1208 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1209 "invalid octal escape sequence '\\%s'",
1210 buf) < 0)
1211 {
1212 Py_DECREF(result);
1213 return NULL;
1214 }
1215 }
1216 else {
1217 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1218 "invalid escape sequence '\\%c'",
1219 first_invalid_escape_char) < 0)
1220 {
1221 Py_DECREF(result);
1222 return NULL;
1223 }
1224 }
1225 }
1226 return result;
1227
1228 }
1229 /* -------------------------------------------------------------------- */
1230 /* object api */
1231
1232 Py_ssize_t
PyBytes_Size(PyObject * op)1233 PyBytes_Size(PyObject *op)
1234 {
1235 if (!PyBytes_Check(op)) {
1236 PyErr_Format(PyExc_TypeError,
1237 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1238 return -1;
1239 }
1240 return Py_SIZE(op);
1241 }
1242
1243 char *
PyBytes_AsString(PyObject * op)1244 PyBytes_AsString(PyObject *op)
1245 {
1246 if (!PyBytes_Check(op)) {
1247 PyErr_Format(PyExc_TypeError,
1248 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1249 return NULL;
1250 }
1251 return ((PyBytesObject *)op)->ob_sval;
1252 }
1253
1254 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1255 PyBytes_AsStringAndSize(PyObject *obj,
1256 char **s,
1257 Py_ssize_t *len)
1258 {
1259 if (s == NULL) {
1260 PyErr_BadInternalCall();
1261 return -1;
1262 }
1263
1264 if (!PyBytes_Check(obj)) {
1265 PyErr_Format(PyExc_TypeError,
1266 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1267 return -1;
1268 }
1269
1270 *s = PyBytes_AS_STRING(obj);
1271 if (len != NULL)
1272 *len = PyBytes_GET_SIZE(obj);
1273 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1274 PyErr_SetString(PyExc_ValueError,
1275 "embedded null byte");
1276 return -1;
1277 }
1278 return 0;
1279 }
1280
1281 /* -------------------------------------------------------------------- */
1282 /* Methods */
1283
1284 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1285
1286 #include "stringlib/stringdefs.h"
1287 #define STRINGLIB_MUTABLE 0
1288
1289 #include "stringlib/fastsearch.h"
1290 #include "stringlib/count.h"
1291 #include "stringlib/find.h"
1292 #include "stringlib/join.h"
1293 #include "stringlib/partition.h"
1294 #include "stringlib/split.h"
1295 #include "stringlib/ctype.h"
1296
1297 #include "stringlib/transmogrify.h"
1298
1299 #undef STRINGLIB_GET_EMPTY
1300
1301 Py_ssize_t
_PyBytes_Find(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1302 _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1303 const char *needle, Py_ssize_t len_needle,
1304 Py_ssize_t offset)
1305 {
1306 return stringlib_find(haystack, len_haystack,
1307 needle, len_needle, offset);
1308 }
1309
1310 Py_ssize_t
_PyBytes_ReverseFind(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1311 _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1312 const char *needle, Py_ssize_t len_needle,
1313 Py_ssize_t offset)
1314 {
1315 return stringlib_rfind(haystack, len_haystack,
1316 needle, len_needle, offset);
1317 }
1318
1319 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1320 PyBytes_Repr(PyObject *obj, int smartquotes)
1321 {
1322 PyBytesObject* op = (PyBytesObject*) obj;
1323 Py_ssize_t i, length = Py_SIZE(op);
1324 Py_ssize_t newsize, squotes, dquotes;
1325 PyObject *v;
1326 unsigned char quote;
1327 const unsigned char *s;
1328 Py_UCS1 *p;
1329
1330 /* Compute size of output string */
1331 squotes = dquotes = 0;
1332 newsize = 3; /* b'' */
1333 s = (const unsigned char*)op->ob_sval;
1334 for (i = 0; i < length; i++) {
1335 Py_ssize_t incr = 1;
1336 switch(s[i]) {
1337 case '\'': squotes++; break;
1338 case '"': dquotes++; break;
1339 case '\\': case '\t': case '\n': case '\r':
1340 incr = 2; break; /* \C */
1341 default:
1342 if (s[i] < ' ' || s[i] >= 0x7f)
1343 incr = 4; /* \xHH */
1344 }
1345 if (newsize > PY_SSIZE_T_MAX - incr)
1346 goto overflow;
1347 newsize += incr;
1348 }
1349 quote = '\'';
1350 if (smartquotes && squotes && !dquotes)
1351 quote = '"';
1352 if (squotes && quote == '\'') {
1353 if (newsize > PY_SSIZE_T_MAX - squotes)
1354 goto overflow;
1355 newsize += squotes;
1356 }
1357
1358 v = PyUnicode_New(newsize, 127);
1359 if (v == NULL) {
1360 return NULL;
1361 }
1362 p = PyUnicode_1BYTE_DATA(v);
1363
1364 *p++ = 'b', *p++ = quote;
1365 for (i = 0; i < length; i++) {
1366 unsigned char c = op->ob_sval[i];
1367 if (c == quote || c == '\\')
1368 *p++ = '\\', *p++ = c;
1369 else if (c == '\t')
1370 *p++ = '\\', *p++ = 't';
1371 else if (c == '\n')
1372 *p++ = '\\', *p++ = 'n';
1373 else if (c == '\r')
1374 *p++ = '\\', *p++ = 'r';
1375 else if (c < ' ' || c >= 0x7f) {
1376 *p++ = '\\';
1377 *p++ = 'x';
1378 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1379 *p++ = Py_hexdigits[c & 0xf];
1380 }
1381 else
1382 *p++ = c;
1383 }
1384 *p++ = quote;
1385 assert(_PyUnicode_CheckConsistency(v, 1));
1386 return v;
1387
1388 overflow:
1389 PyErr_SetString(PyExc_OverflowError,
1390 "bytes object is too large to make repr");
1391 return NULL;
1392 }
1393
1394 static PyObject *
bytes_repr(PyObject * op)1395 bytes_repr(PyObject *op)
1396 {
1397 return PyBytes_Repr(op, 1);
1398 }
1399
1400 static PyObject *
bytes_str(PyObject * op)1401 bytes_str(PyObject *op)
1402 {
1403 if (_Py_GetConfig()->bytes_warning) {
1404 if (PyErr_WarnEx(PyExc_BytesWarning,
1405 "str() on a bytes instance", 1)) {
1406 return NULL;
1407 }
1408 }
1409 return bytes_repr(op);
1410 }
1411
1412 static Py_ssize_t
bytes_length(PyBytesObject * a)1413 bytes_length(PyBytesObject *a)
1414 {
1415 return Py_SIZE(a);
1416 }
1417
1418 /* This is also used by PyBytes_Concat() */
1419 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1420 bytes_concat(PyObject *a, PyObject *b)
1421 {
1422 Py_buffer va, vb;
1423 PyObject *result = NULL;
1424
1425 va.len = -1;
1426 vb.len = -1;
1427 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1428 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1429 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1430 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1431 goto done;
1432 }
1433
1434 /* Optimize end cases */
1435 if (va.len == 0 && PyBytes_CheckExact(b)) {
1436 result = b;
1437 Py_INCREF(result);
1438 goto done;
1439 }
1440 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1441 result = a;
1442 Py_INCREF(result);
1443 goto done;
1444 }
1445
1446 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1447 PyErr_NoMemory();
1448 goto done;
1449 }
1450
1451 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1452 if (result != NULL) {
1453 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1454 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1455 }
1456
1457 done:
1458 if (va.len != -1)
1459 PyBuffer_Release(&va);
1460 if (vb.len != -1)
1461 PyBuffer_Release(&vb);
1462 return result;
1463 }
1464
1465 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1466 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1467 {
1468 Py_ssize_t size;
1469 PyBytesObject *op;
1470 size_t nbytes;
1471 if (n < 0)
1472 n = 0;
1473 /* watch out for overflows: the size can overflow int,
1474 * and the # of bytes needed can overflow size_t
1475 */
1476 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1477 PyErr_SetString(PyExc_OverflowError,
1478 "repeated bytes are too long");
1479 return NULL;
1480 }
1481 size = Py_SIZE(a) * n;
1482 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483 Py_INCREF(a);
1484 return (PyObject *)a;
1485 }
1486 nbytes = (size_t)size;
1487 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488 PyErr_SetString(PyExc_OverflowError,
1489 "repeated bytes are too long");
1490 return NULL;
1491 }
1492 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1493 if (op == NULL) {
1494 return PyErr_NoMemory();
1495 }
1496 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1497 _Py_COMP_DIAG_PUSH
1498 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1499 op->ob_shash = -1;
1500 _Py_COMP_DIAG_POP
1501 op->ob_sval[size] = '\0';
1502
1503 _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1504
1505 return (PyObject *) op;
1506 }
1507
1508 static int
bytes_contains(PyObject * self,PyObject * arg)1509 bytes_contains(PyObject *self, PyObject *arg)
1510 {
1511 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1512 }
1513
1514 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1515 bytes_item(PyBytesObject *a, Py_ssize_t i)
1516 {
1517 if (i < 0 || i >= Py_SIZE(a)) {
1518 PyErr_SetString(PyExc_IndexError, "index out of range");
1519 return NULL;
1520 }
1521 return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1522 }
1523
1524 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1525 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1526 {
1527 int cmp;
1528 Py_ssize_t len;
1529
1530 len = Py_SIZE(a);
1531 if (Py_SIZE(b) != len)
1532 return 0;
1533
1534 if (a->ob_sval[0] != b->ob_sval[0])
1535 return 0;
1536
1537 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1538 return (cmp == 0);
1539 }
1540
1541 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1542 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1543 {
1544 int c;
1545 Py_ssize_t len_a, len_b;
1546 Py_ssize_t min_len;
1547
1548 /* Make sure both arguments are strings. */
1549 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1550 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1551 if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1552 if (PyErr_WarnEx(PyExc_BytesWarning,
1553 "Comparison between bytes and string", 1))
1554 return NULL;
1555 }
1556 if (PyLong_Check(a) || PyLong_Check(b)) {
1557 if (PyErr_WarnEx(PyExc_BytesWarning,
1558 "Comparison between bytes and int", 1))
1559 return NULL;
1560 }
1561 }
1562 Py_RETURN_NOTIMPLEMENTED;
1563 }
1564 else if (a == b) {
1565 switch (op) {
1566 case Py_EQ:
1567 case Py_LE:
1568 case Py_GE:
1569 /* a byte string is equal to itself */
1570 Py_RETURN_TRUE;
1571 case Py_NE:
1572 case Py_LT:
1573 case Py_GT:
1574 Py_RETURN_FALSE;
1575 default:
1576 PyErr_BadArgument();
1577 return NULL;
1578 }
1579 }
1580 else if (op == Py_EQ || op == Py_NE) {
1581 int eq = bytes_compare_eq(a, b);
1582 eq ^= (op == Py_NE);
1583 return PyBool_FromLong(eq);
1584 }
1585 else {
1586 len_a = Py_SIZE(a);
1587 len_b = Py_SIZE(b);
1588 min_len = Py_MIN(len_a, len_b);
1589 if (min_len > 0) {
1590 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1591 if (c == 0)
1592 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1593 }
1594 else
1595 c = 0;
1596 if (c != 0)
1597 Py_RETURN_RICHCOMPARE(c, 0, op);
1598 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1599 }
1600 }
1601
1602 static Py_hash_t
bytes_hash(PyBytesObject * a)1603 bytes_hash(PyBytesObject *a)
1604 {
1605 _Py_COMP_DIAG_PUSH
1606 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1607 if (a->ob_shash == -1) {
1608 /* Can't fail */
1609 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1610 }
1611 return a->ob_shash;
1612 _Py_COMP_DIAG_POP
1613 }
1614
1615 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1616 bytes_subscript(PyBytesObject* self, PyObject* item)
1617 {
1618 if (_PyIndex_Check(item)) {
1619 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1620 if (i == -1 && PyErr_Occurred())
1621 return NULL;
1622 if (i < 0)
1623 i += PyBytes_GET_SIZE(self);
1624 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1625 PyErr_SetString(PyExc_IndexError,
1626 "index out of range");
1627 return NULL;
1628 }
1629 return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1630 }
1631 else if (PySlice_Check(item)) {
1632 Py_ssize_t start, stop, step, slicelength, i;
1633 size_t cur;
1634 const char* source_buf;
1635 char* result_buf;
1636 PyObject* result;
1637
1638 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1639 return NULL;
1640 }
1641 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1642 &stop, step);
1643
1644 if (slicelength <= 0) {
1645 return PyBytes_FromStringAndSize("", 0);
1646 }
1647 else if (start == 0 && step == 1 &&
1648 slicelength == PyBytes_GET_SIZE(self) &&
1649 PyBytes_CheckExact(self)) {
1650 Py_INCREF(self);
1651 return (PyObject *)self;
1652 }
1653 else if (step == 1) {
1654 return PyBytes_FromStringAndSize(
1655 PyBytes_AS_STRING(self) + start,
1656 slicelength);
1657 }
1658 else {
1659 source_buf = PyBytes_AS_STRING(self);
1660 result = PyBytes_FromStringAndSize(NULL, slicelength);
1661 if (result == NULL)
1662 return NULL;
1663
1664 result_buf = PyBytes_AS_STRING(result);
1665 for (cur = start, i = 0; i < slicelength;
1666 cur += step, i++) {
1667 result_buf[i] = source_buf[cur];
1668 }
1669
1670 return result;
1671 }
1672 }
1673 else {
1674 PyErr_Format(PyExc_TypeError,
1675 "byte indices must be integers or slices, not %.200s",
1676 Py_TYPE(item)->tp_name);
1677 return NULL;
1678 }
1679 }
1680
1681 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1682 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1683 {
1684 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1685 1, flags);
1686 }
1687
1688 static PySequenceMethods bytes_as_sequence = {
1689 (lenfunc)bytes_length, /*sq_length*/
1690 (binaryfunc)bytes_concat, /*sq_concat*/
1691 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1692 (ssizeargfunc)bytes_item, /*sq_item*/
1693 0, /*sq_slice*/
1694 0, /*sq_ass_item*/
1695 0, /*sq_ass_slice*/
1696 (objobjproc)bytes_contains /*sq_contains*/
1697 };
1698
1699 static PyMappingMethods bytes_as_mapping = {
1700 (lenfunc)bytes_length,
1701 (binaryfunc)bytes_subscript,
1702 0,
1703 };
1704
1705 static PyBufferProcs bytes_as_buffer = {
1706 (getbufferproc)bytes_buffer_getbuffer,
1707 NULL,
1708 };
1709
1710
1711 /*[clinic input]
1712 bytes.__bytes__
1713 Convert this value to exact type bytes.
1714 [clinic start generated code]*/
1715
1716 static PyObject *
bytes___bytes___impl(PyBytesObject * self)1717 bytes___bytes___impl(PyBytesObject *self)
1718 /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1719 {
1720 if (PyBytes_CheckExact(self)) {
1721 Py_INCREF(self);
1722 return (PyObject *)self;
1723 }
1724 else {
1725 return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1726 }
1727 }
1728
1729
1730 #define LEFTSTRIP 0
1731 #define RIGHTSTRIP 1
1732 #define BOTHSTRIP 2
1733
1734 /*[clinic input]
1735 bytes.split
1736
1737 sep: object = None
1738 The delimiter according which to split the bytes.
1739 None (the default value) means split on ASCII whitespace characters
1740 (space, tab, return, newline, formfeed, vertical tab).
1741 maxsplit: Py_ssize_t = -1
1742 Maximum number of splits to do.
1743 -1 (the default value) means no limit.
1744
1745 Return a list of the sections in the bytes, using sep as the delimiter.
1746 [clinic start generated code]*/
1747
1748 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1749 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1750 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1751 {
1752 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1753 const char *s = PyBytes_AS_STRING(self), *sub;
1754 Py_buffer vsub;
1755 PyObject *list;
1756
1757 if (maxsplit < 0)
1758 maxsplit = PY_SSIZE_T_MAX;
1759 if (sep == Py_None)
1760 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1761 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1762 return NULL;
1763 sub = vsub.buf;
1764 n = vsub.len;
1765
1766 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1767 PyBuffer_Release(&vsub);
1768 return list;
1769 }
1770
1771 /*[clinic input]
1772 bytes.partition
1773
1774 sep: Py_buffer
1775 /
1776
1777 Partition the bytes into three parts using the given separator.
1778
1779 This will search for the separator sep in the bytes. If the separator is found,
1780 returns a 3-tuple containing the part before the separator, the separator
1781 itself, and the part after it.
1782
1783 If the separator is not found, returns a 3-tuple containing the original bytes
1784 object and two empty bytes objects.
1785 [clinic start generated code]*/
1786
1787 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1788 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1789 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1790 {
1791 return stringlib_partition(
1792 (PyObject*) self,
1793 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1794 sep->obj, (const char *)sep->buf, sep->len
1795 );
1796 }
1797
1798 /*[clinic input]
1799 bytes.rpartition
1800
1801 sep: Py_buffer
1802 /
1803
1804 Partition the bytes into three parts using the given separator.
1805
1806 This will search for the separator sep in the bytes, starting at the end. If
1807 the separator is found, returns a 3-tuple containing the part before the
1808 separator, the separator itself, and the part after it.
1809
1810 If the separator is not found, returns a 3-tuple containing two empty bytes
1811 objects and the original bytes object.
1812 [clinic start generated code]*/
1813
1814 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1815 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1816 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1817 {
1818 return stringlib_rpartition(
1819 (PyObject*) self,
1820 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1821 sep->obj, (const char *)sep->buf, sep->len
1822 );
1823 }
1824
1825 /*[clinic input]
1826 bytes.rsplit = bytes.split
1827
1828 Return a list of the sections in the bytes, using sep as the delimiter.
1829
1830 Splitting is done starting at the end of the bytes and working to the front.
1831 [clinic start generated code]*/
1832
1833 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1834 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1835 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1836 {
1837 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1838 const char *s = PyBytes_AS_STRING(self), *sub;
1839 Py_buffer vsub;
1840 PyObject *list;
1841
1842 if (maxsplit < 0)
1843 maxsplit = PY_SSIZE_T_MAX;
1844 if (sep == Py_None)
1845 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1846 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1847 return NULL;
1848 sub = vsub.buf;
1849 n = vsub.len;
1850
1851 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1852 PyBuffer_Release(&vsub);
1853 return list;
1854 }
1855
1856
1857 /*[clinic input]
1858 bytes.join
1859
1860 iterable_of_bytes: object
1861 /
1862
1863 Concatenate any number of bytes objects.
1864
1865 The bytes whose method is called is inserted in between each pair.
1866
1867 The result is returned as a new bytes object.
1868
1869 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1870 [clinic start generated code]*/
1871
1872 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1873 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1874 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1875 {
1876 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1877 }
1878
1879 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1880 _PyBytes_Join(PyObject *sep, PyObject *x)
1881 {
1882 assert(sep != NULL && PyBytes_Check(sep));
1883 assert(x != NULL);
1884 return bytes_join((PyBytesObject*)sep, x);
1885 }
1886
1887 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1888 bytes_find(PyBytesObject *self, PyObject *args)
1889 {
1890 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1891 }
1892
1893 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1894 bytes_index(PyBytesObject *self, PyObject *args)
1895 {
1896 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1897 }
1898
1899
1900 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1901 bytes_rfind(PyBytesObject *self, PyObject *args)
1902 {
1903 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1904 }
1905
1906
1907 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1908 bytes_rindex(PyBytesObject *self, PyObject *args)
1909 {
1910 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1911 }
1912
1913
1914 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1915 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1916 {
1917 Py_buffer vsep;
1918 const char *s = PyBytes_AS_STRING(self);
1919 Py_ssize_t len = PyBytes_GET_SIZE(self);
1920 char *sep;
1921 Py_ssize_t seplen;
1922 Py_ssize_t i, j;
1923
1924 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1925 return NULL;
1926 sep = vsep.buf;
1927 seplen = vsep.len;
1928
1929 i = 0;
1930 if (striptype != RIGHTSTRIP) {
1931 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1932 i++;
1933 }
1934 }
1935
1936 j = len;
1937 if (striptype != LEFTSTRIP) {
1938 do {
1939 j--;
1940 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1941 j++;
1942 }
1943
1944 PyBuffer_Release(&vsep);
1945
1946 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1947 Py_INCREF(self);
1948 return (PyObject*)self;
1949 }
1950 else
1951 return PyBytes_FromStringAndSize(s+i, j-i);
1952 }
1953
1954
1955 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1956 do_strip(PyBytesObject *self, int striptype)
1957 {
1958 const char *s = PyBytes_AS_STRING(self);
1959 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1960
1961 i = 0;
1962 if (striptype != RIGHTSTRIP) {
1963 while (i < len && Py_ISSPACE(s[i])) {
1964 i++;
1965 }
1966 }
1967
1968 j = len;
1969 if (striptype != LEFTSTRIP) {
1970 do {
1971 j--;
1972 } while (j >= i && Py_ISSPACE(s[j]));
1973 j++;
1974 }
1975
1976 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1977 Py_INCREF(self);
1978 return (PyObject*)self;
1979 }
1980 else
1981 return PyBytes_FromStringAndSize(s+i, j-i);
1982 }
1983
1984
1985 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1986 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1987 {
1988 if (bytes != Py_None) {
1989 return do_xstrip(self, striptype, bytes);
1990 }
1991 return do_strip(self, striptype);
1992 }
1993
1994 /*[clinic input]
1995 bytes.strip
1996
1997 bytes: object = None
1998 /
1999
2000 Strip leading and trailing bytes contained in the argument.
2001
2002 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2003 [clinic start generated code]*/
2004
2005 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2006 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2007 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2008 {
2009 return do_argstrip(self, BOTHSTRIP, bytes);
2010 }
2011
2012 /*[clinic input]
2013 bytes.lstrip
2014
2015 bytes: object = None
2016 /
2017
2018 Strip leading bytes contained in the argument.
2019
2020 If the argument is omitted or None, strip leading ASCII whitespace.
2021 [clinic start generated code]*/
2022
2023 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2024 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2025 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2026 {
2027 return do_argstrip(self, LEFTSTRIP, bytes);
2028 }
2029
2030 /*[clinic input]
2031 bytes.rstrip
2032
2033 bytes: object = None
2034 /
2035
2036 Strip trailing bytes contained in the argument.
2037
2038 If the argument is omitted or None, strip trailing ASCII whitespace.
2039 [clinic start generated code]*/
2040
2041 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2042 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2043 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2044 {
2045 return do_argstrip(self, RIGHTSTRIP, bytes);
2046 }
2047
2048
2049 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2050 bytes_count(PyBytesObject *self, PyObject *args)
2051 {
2052 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2053 }
2054
2055
2056 /*[clinic input]
2057 bytes.translate
2058
2059 table: object
2060 Translation table, which must be a bytes object of length 256.
2061 /
2062 delete as deletechars: object(c_default="NULL") = b''
2063
2064 Return a copy with each character mapped by the given translation table.
2065
2066 All characters occurring in the optional argument delete are removed.
2067 The remaining characters are mapped through the given translation table.
2068 [clinic start generated code]*/
2069
2070 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2071 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2072 PyObject *deletechars)
2073 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2074 {
2075 const char *input;
2076 char *output;
2077 Py_buffer table_view = {NULL, NULL};
2078 Py_buffer del_table_view = {NULL, NULL};
2079 const char *table_chars;
2080 Py_ssize_t i, c, changed = 0;
2081 PyObject *input_obj = (PyObject*)self;
2082 const char *output_start, *del_table_chars=NULL;
2083 Py_ssize_t inlen, tablen, dellen = 0;
2084 PyObject *result;
2085 int trans_table[256];
2086
2087 if (PyBytes_Check(table)) {
2088 table_chars = PyBytes_AS_STRING(table);
2089 tablen = PyBytes_GET_SIZE(table);
2090 }
2091 else if (table == Py_None) {
2092 table_chars = NULL;
2093 tablen = 256;
2094 }
2095 else {
2096 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2097 return NULL;
2098 table_chars = table_view.buf;
2099 tablen = table_view.len;
2100 }
2101
2102 if (tablen != 256) {
2103 PyErr_SetString(PyExc_ValueError,
2104 "translation table must be 256 characters long");
2105 PyBuffer_Release(&table_view);
2106 return NULL;
2107 }
2108
2109 if (deletechars != NULL) {
2110 if (PyBytes_Check(deletechars)) {
2111 del_table_chars = PyBytes_AS_STRING(deletechars);
2112 dellen = PyBytes_GET_SIZE(deletechars);
2113 }
2114 else {
2115 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2116 PyBuffer_Release(&table_view);
2117 return NULL;
2118 }
2119 del_table_chars = del_table_view.buf;
2120 dellen = del_table_view.len;
2121 }
2122 }
2123 else {
2124 del_table_chars = NULL;
2125 dellen = 0;
2126 }
2127
2128 inlen = PyBytes_GET_SIZE(input_obj);
2129 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2130 if (result == NULL) {
2131 PyBuffer_Release(&del_table_view);
2132 PyBuffer_Release(&table_view);
2133 return NULL;
2134 }
2135 output_start = output = PyBytes_AS_STRING(result);
2136 input = PyBytes_AS_STRING(input_obj);
2137
2138 if (dellen == 0 && table_chars != NULL) {
2139 /* If no deletions are required, use faster code */
2140 for (i = inlen; --i >= 0; ) {
2141 c = Py_CHARMASK(*input++);
2142 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2143 changed = 1;
2144 }
2145 if (!changed && PyBytes_CheckExact(input_obj)) {
2146 Py_INCREF(input_obj);
2147 Py_DECREF(result);
2148 result = input_obj;
2149 }
2150 PyBuffer_Release(&del_table_view);
2151 PyBuffer_Release(&table_view);
2152 return result;
2153 }
2154
2155 if (table_chars == NULL) {
2156 for (i = 0; i < 256; i++)
2157 trans_table[i] = Py_CHARMASK(i);
2158 } else {
2159 for (i = 0; i < 256; i++)
2160 trans_table[i] = Py_CHARMASK(table_chars[i]);
2161 }
2162 PyBuffer_Release(&table_view);
2163
2164 for (i = 0; i < dellen; i++)
2165 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2166 PyBuffer_Release(&del_table_view);
2167
2168 for (i = inlen; --i >= 0; ) {
2169 c = Py_CHARMASK(*input++);
2170 if (trans_table[c] != -1)
2171 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2172 continue;
2173 changed = 1;
2174 }
2175 if (!changed && PyBytes_CheckExact(input_obj)) {
2176 Py_DECREF(result);
2177 Py_INCREF(input_obj);
2178 return input_obj;
2179 }
2180 /* Fix the size of the resulting byte string */
2181 if (inlen > 0)
2182 _PyBytes_Resize(&result, output - output_start);
2183 return result;
2184 }
2185
2186
2187 /*[clinic input]
2188
2189 @staticmethod
2190 bytes.maketrans
2191
2192 frm: Py_buffer
2193 to: Py_buffer
2194 /
2195
2196 Return a translation table useable for the bytes or bytearray translate method.
2197
2198 The returned table will be one where each byte in frm is mapped to the byte at
2199 the same position in to.
2200
2201 The bytes objects frm and to must be of the same length.
2202 [clinic start generated code]*/
2203
2204 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2205 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2206 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2207 {
2208 return _Py_bytes_maketrans(frm, to);
2209 }
2210
2211
2212 /*[clinic input]
2213 bytes.replace
2214
2215 old: Py_buffer
2216 new: Py_buffer
2217 count: Py_ssize_t = -1
2218 Maximum number of occurrences to replace.
2219 -1 (the default value) means replace all occurrences.
2220 /
2221
2222 Return a copy with all occurrences of substring old replaced by new.
2223
2224 If the optional argument count is given, only the first count occurrences are
2225 replaced.
2226 [clinic start generated code]*/
2227
2228 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2229 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2230 Py_ssize_t count)
2231 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2232 {
2233 return stringlib_replace((PyObject *)self,
2234 (const char *)old->buf, old->len,
2235 (const char *)new->buf, new->len, count);
2236 }
2237
2238 /** End DALKE **/
2239
2240 /*[clinic input]
2241 bytes.removeprefix as bytes_removeprefix
2242
2243 prefix: Py_buffer
2244 /
2245
2246 Return a bytes object with the given prefix string removed if present.
2247
2248 If the bytes starts with the prefix string, return bytes[len(prefix):].
2249 Otherwise, return a copy of the original bytes.
2250 [clinic start generated code]*/
2251
2252 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2253 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2254 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2255 {
2256 const char *self_start = PyBytes_AS_STRING(self);
2257 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2258 const char *prefix_start = prefix->buf;
2259 Py_ssize_t prefix_len = prefix->len;
2260
2261 if (self_len >= prefix_len
2262 && prefix_len > 0
2263 && memcmp(self_start, prefix_start, prefix_len) == 0)
2264 {
2265 return PyBytes_FromStringAndSize(self_start + prefix_len,
2266 self_len - prefix_len);
2267 }
2268
2269 if (PyBytes_CheckExact(self)) {
2270 Py_INCREF(self);
2271 return (PyObject *)self;
2272 }
2273
2274 return PyBytes_FromStringAndSize(self_start, self_len);
2275 }
2276
2277 /*[clinic input]
2278 bytes.removesuffix as bytes_removesuffix
2279
2280 suffix: Py_buffer
2281 /
2282
2283 Return a bytes object with the given suffix string removed if present.
2284
2285 If the bytes ends with the suffix string and that suffix is not empty,
2286 return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2287 bytes.
2288 [clinic start generated code]*/
2289
2290 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2291 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2292 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2293 {
2294 const char *self_start = PyBytes_AS_STRING(self);
2295 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2296 const char *suffix_start = suffix->buf;
2297 Py_ssize_t suffix_len = suffix->len;
2298
2299 if (self_len >= suffix_len
2300 && suffix_len > 0
2301 && memcmp(self_start + self_len - suffix_len,
2302 suffix_start, suffix_len) == 0)
2303 {
2304 return PyBytes_FromStringAndSize(self_start,
2305 self_len - suffix_len);
2306 }
2307
2308 if (PyBytes_CheckExact(self)) {
2309 Py_INCREF(self);
2310 return (PyObject *)self;
2311 }
2312
2313 return PyBytes_FromStringAndSize(self_start, self_len);
2314 }
2315
2316 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2317 bytes_startswith(PyBytesObject *self, PyObject *args)
2318 {
2319 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2320 }
2321
2322 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2323 bytes_endswith(PyBytesObject *self, PyObject *args)
2324 {
2325 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2326 }
2327
2328
2329 /*[clinic input]
2330 bytes.decode
2331
2332 encoding: str(c_default="NULL") = 'utf-8'
2333 The encoding with which to decode the bytes.
2334 errors: str(c_default="NULL") = 'strict'
2335 The error handling scheme to use for the handling of decoding errors.
2336 The default is 'strict' meaning that decoding errors raise a
2337 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2338 as well as any other name registered with codecs.register_error that
2339 can handle UnicodeDecodeErrors.
2340
2341 Decode the bytes using the codec registered for encoding.
2342 [clinic start generated code]*/
2343
2344 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2345 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2346 const char *errors)
2347 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2348 {
2349 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2350 }
2351
2352
2353 /*[clinic input]
2354 bytes.splitlines
2355
2356 keepends: bool(accept={int}) = False
2357
2358 Return a list of the lines in the bytes, breaking at line boundaries.
2359
2360 Line breaks are not included in the resulting list unless keepends is given and
2361 true.
2362 [clinic start generated code]*/
2363
2364 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2365 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2366 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2367 {
2368 return stringlib_splitlines(
2369 (PyObject*) self, PyBytes_AS_STRING(self),
2370 PyBytes_GET_SIZE(self), keepends
2371 );
2372 }
2373
2374 /*[clinic input]
2375 @classmethod
2376 bytes.fromhex
2377
2378 string: unicode
2379 /
2380
2381 Create a bytes object from a string of hexadecimal numbers.
2382
2383 Spaces between two numbers are accepted.
2384 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2385 [clinic start generated code]*/
2386
2387 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2388 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2389 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2390 {
2391 PyObject *result = _PyBytes_FromHex(string, 0);
2392 if (type != &PyBytes_Type && result != NULL) {
2393 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2394 }
2395 return result;
2396 }
2397
2398 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2399 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2400 {
2401 char *buf;
2402 Py_ssize_t hexlen, invalid_char;
2403 unsigned int top, bot;
2404 const Py_UCS1 *str, *end;
2405 _PyBytesWriter writer;
2406
2407 _PyBytesWriter_Init(&writer);
2408 writer.use_bytearray = use_bytearray;
2409
2410 assert(PyUnicode_Check(string));
2411 if (PyUnicode_READY(string))
2412 return NULL;
2413 hexlen = PyUnicode_GET_LENGTH(string);
2414
2415 if (!PyUnicode_IS_ASCII(string)) {
2416 const void *data = PyUnicode_DATA(string);
2417 unsigned int kind = PyUnicode_KIND(string);
2418 Py_ssize_t i;
2419
2420 /* search for the first non-ASCII character */
2421 for (i = 0; i < hexlen; i++) {
2422 if (PyUnicode_READ(kind, data, i) >= 128)
2423 break;
2424 }
2425 invalid_char = i;
2426 goto error;
2427 }
2428
2429 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2430 str = PyUnicode_1BYTE_DATA(string);
2431
2432 /* This overestimates if there are spaces */
2433 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2434 if (buf == NULL)
2435 return NULL;
2436
2437 end = str + hexlen;
2438 while (str < end) {
2439 /* skip over spaces in the input */
2440 if (Py_ISSPACE(*str)) {
2441 do {
2442 str++;
2443 } while (Py_ISSPACE(*str));
2444 if (str >= end)
2445 break;
2446 }
2447
2448 top = _PyLong_DigitValue[*str];
2449 if (top >= 16) {
2450 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2451 goto error;
2452 }
2453 str++;
2454
2455 bot = _PyLong_DigitValue[*str];
2456 if (bot >= 16) {
2457 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2458 goto error;
2459 }
2460 str++;
2461
2462 *buf++ = (unsigned char)((top << 4) + bot);
2463 }
2464
2465 return _PyBytesWriter_Finish(&writer, buf);
2466
2467 error:
2468 PyErr_Format(PyExc_ValueError,
2469 "non-hexadecimal number found in "
2470 "fromhex() arg at position %zd", invalid_char);
2471 _PyBytesWriter_Dealloc(&writer);
2472 return NULL;
2473 }
2474
2475 /*[clinic input]
2476 bytes.hex
2477
2478 sep: object = NULL
2479 An optional single character or byte to separate hex bytes.
2480 bytes_per_sep: int = 1
2481 How many bytes between separators. Positive values count from the
2482 right, negative values count from the left.
2483
2484 Create a string of hexadecimal numbers from a bytes object.
2485
2486 Example:
2487 >>> value = b'\xb9\x01\xef'
2488 >>> value.hex()
2489 'b901ef'
2490 >>> value.hex(':')
2491 'b9:01:ef'
2492 >>> value.hex(':', 2)
2493 'b9:01ef'
2494 >>> value.hex(':', -2)
2495 'b901:ef'
2496 [clinic start generated code]*/
2497
2498 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2499 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2500 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2501 {
2502 const char *argbuf = PyBytes_AS_STRING(self);
2503 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2504 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2505 }
2506
2507 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2508 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2509 {
2510 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2511 }
2512
2513
2514 static PyMethodDef
2515 bytes_methods[] = {
2516 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2517 BYTES___BYTES___METHODDEF
2518 {"capitalize", stringlib_capitalize, METH_NOARGS,
2519 _Py_capitalize__doc__},
2520 STRINGLIB_CENTER_METHODDEF
2521 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2522 _Py_count__doc__},
2523 BYTES_DECODE_METHODDEF
2524 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2525 _Py_endswith__doc__},
2526 STRINGLIB_EXPANDTABS_METHODDEF
2527 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2528 _Py_find__doc__},
2529 BYTES_FROMHEX_METHODDEF
2530 BYTES_HEX_METHODDEF
2531 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2532 {"isalnum", stringlib_isalnum, METH_NOARGS,
2533 _Py_isalnum__doc__},
2534 {"isalpha", stringlib_isalpha, METH_NOARGS,
2535 _Py_isalpha__doc__},
2536 {"isascii", stringlib_isascii, METH_NOARGS,
2537 _Py_isascii__doc__},
2538 {"isdigit", stringlib_isdigit, METH_NOARGS,
2539 _Py_isdigit__doc__},
2540 {"islower", stringlib_islower, METH_NOARGS,
2541 _Py_islower__doc__},
2542 {"isspace", stringlib_isspace, METH_NOARGS,
2543 _Py_isspace__doc__},
2544 {"istitle", stringlib_istitle, METH_NOARGS,
2545 _Py_istitle__doc__},
2546 {"isupper", stringlib_isupper, METH_NOARGS,
2547 _Py_isupper__doc__},
2548 BYTES_JOIN_METHODDEF
2549 STRINGLIB_LJUST_METHODDEF
2550 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2551 BYTES_LSTRIP_METHODDEF
2552 BYTES_MAKETRANS_METHODDEF
2553 BYTES_PARTITION_METHODDEF
2554 BYTES_REPLACE_METHODDEF
2555 BYTES_REMOVEPREFIX_METHODDEF
2556 BYTES_REMOVESUFFIX_METHODDEF
2557 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2558 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2559 STRINGLIB_RJUST_METHODDEF
2560 BYTES_RPARTITION_METHODDEF
2561 BYTES_RSPLIT_METHODDEF
2562 BYTES_RSTRIP_METHODDEF
2563 BYTES_SPLIT_METHODDEF
2564 BYTES_SPLITLINES_METHODDEF
2565 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2566 _Py_startswith__doc__},
2567 BYTES_STRIP_METHODDEF
2568 {"swapcase", stringlib_swapcase, METH_NOARGS,
2569 _Py_swapcase__doc__},
2570 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2571 BYTES_TRANSLATE_METHODDEF
2572 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2573 STRINGLIB_ZFILL_METHODDEF
2574 {NULL, NULL} /* sentinel */
2575 };
2576
2577 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2578 bytes_mod(PyObject *self, PyObject *arg)
2579 {
2580 if (!PyBytes_Check(self)) {
2581 Py_RETURN_NOTIMPLEMENTED;
2582 }
2583 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2584 arg, 0);
2585 }
2586
2587 static PyNumberMethods bytes_as_number = {
2588 0, /*nb_add*/
2589 0, /*nb_subtract*/
2590 0, /*nb_multiply*/
2591 bytes_mod, /*nb_remainder*/
2592 };
2593
2594 static PyObject *
2595 bytes_subtype_new(PyTypeObject *, PyObject *);
2596
2597 /*[clinic input]
2598 @classmethod
2599 bytes.__new__ as bytes_new
2600
2601 source as x: object = NULL
2602 encoding: str = NULL
2603 errors: str = NULL
2604
2605 [clinic start generated code]*/
2606
2607 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2608 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2609 const char *errors)
2610 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2611 {
2612 PyObject *bytes;
2613 PyObject *func;
2614 Py_ssize_t size;
2615
2616 if (x == NULL) {
2617 if (encoding != NULL || errors != NULL) {
2618 PyErr_SetString(PyExc_TypeError,
2619 encoding != NULL ?
2620 "encoding without a string argument" :
2621 "errors without a string argument");
2622 return NULL;
2623 }
2624 bytes = PyBytes_FromStringAndSize(NULL, 0);
2625 }
2626 else if (encoding != NULL) {
2627 /* Encode via the codec registry */
2628 if (!PyUnicode_Check(x)) {
2629 PyErr_SetString(PyExc_TypeError,
2630 "encoding without a string argument");
2631 return NULL;
2632 }
2633 bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2634 }
2635 else if (errors != NULL) {
2636 PyErr_SetString(PyExc_TypeError,
2637 PyUnicode_Check(x) ?
2638 "string argument without an encoding" :
2639 "errors without a string argument");
2640 return NULL;
2641 }
2642 /* We'd like to call PyObject_Bytes here, but we need to check for an
2643 integer argument before deferring to PyBytes_FromObject, something
2644 PyObject_Bytes doesn't do. */
2645 else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2646 bytes = _PyObject_CallNoArgs(func);
2647 Py_DECREF(func);
2648 if (bytes == NULL)
2649 return NULL;
2650 if (!PyBytes_Check(bytes)) {
2651 PyErr_Format(PyExc_TypeError,
2652 "__bytes__ returned non-bytes (type %.200s)",
2653 Py_TYPE(bytes)->tp_name);
2654 Py_DECREF(bytes);
2655 return NULL;
2656 }
2657 }
2658 else if (PyErr_Occurred())
2659 return NULL;
2660 else if (PyUnicode_Check(x)) {
2661 PyErr_SetString(PyExc_TypeError,
2662 "string argument without an encoding");
2663 return NULL;
2664 }
2665 /* Is it an integer? */
2666 else if (_PyIndex_Check(x)) {
2667 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2668 if (size == -1 && PyErr_Occurred()) {
2669 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2670 return NULL;
2671 PyErr_Clear(); /* fall through */
2672 bytes = PyBytes_FromObject(x);
2673 }
2674 else {
2675 if (size < 0) {
2676 PyErr_SetString(PyExc_ValueError, "negative count");
2677 return NULL;
2678 }
2679 bytes = _PyBytes_FromSize(size, 1);
2680 }
2681 }
2682 else {
2683 bytes = PyBytes_FromObject(x);
2684 }
2685
2686 if (bytes != NULL && type != &PyBytes_Type) {
2687 Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2688 }
2689
2690 return bytes;
2691 }
2692
2693 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2694 _PyBytes_FromBuffer(PyObject *x)
2695 {
2696 PyObject *new;
2697 Py_buffer view;
2698
2699 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2700 return NULL;
2701
2702 new = PyBytes_FromStringAndSize(NULL, view.len);
2703 if (!new)
2704 goto fail;
2705 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2706 &view, view.len, 'C') < 0)
2707 goto fail;
2708 PyBuffer_Release(&view);
2709 return new;
2710
2711 fail:
2712 Py_XDECREF(new);
2713 PyBuffer_Release(&view);
2714 return NULL;
2715 }
2716
2717 static PyObject*
_PyBytes_FromList(PyObject * x)2718 _PyBytes_FromList(PyObject *x)
2719 {
2720 Py_ssize_t i, size = PyList_GET_SIZE(x);
2721 Py_ssize_t value;
2722 char *str;
2723 PyObject *item;
2724 _PyBytesWriter writer;
2725
2726 _PyBytesWriter_Init(&writer);
2727 str = _PyBytesWriter_Alloc(&writer, size);
2728 if (str == NULL)
2729 return NULL;
2730 writer.overallocate = 1;
2731 size = writer.allocated;
2732
2733 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2734 item = PyList_GET_ITEM(x, i);
2735 Py_INCREF(item);
2736 value = PyNumber_AsSsize_t(item, NULL);
2737 Py_DECREF(item);
2738 if (value == -1 && PyErr_Occurred())
2739 goto error;
2740
2741 if (value < 0 || value >= 256) {
2742 PyErr_SetString(PyExc_ValueError,
2743 "bytes must be in range(0, 256)");
2744 goto error;
2745 }
2746
2747 if (i >= size) {
2748 str = _PyBytesWriter_Resize(&writer, str, size+1);
2749 if (str == NULL)
2750 return NULL;
2751 size = writer.allocated;
2752 }
2753 *str++ = (char) value;
2754 }
2755 return _PyBytesWriter_Finish(&writer, str);
2756
2757 error:
2758 _PyBytesWriter_Dealloc(&writer);
2759 return NULL;
2760 }
2761
2762 static PyObject*
_PyBytes_FromTuple(PyObject * x)2763 _PyBytes_FromTuple(PyObject *x)
2764 {
2765 PyObject *bytes;
2766 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2767 Py_ssize_t value;
2768 char *str;
2769 PyObject *item;
2770
2771 bytes = PyBytes_FromStringAndSize(NULL, size);
2772 if (bytes == NULL)
2773 return NULL;
2774 str = ((PyBytesObject *)bytes)->ob_sval;
2775
2776 for (i = 0; i < size; i++) {
2777 item = PyTuple_GET_ITEM(x, i);
2778 value = PyNumber_AsSsize_t(item, NULL);
2779 if (value == -1 && PyErr_Occurred())
2780 goto error;
2781
2782 if (value < 0 || value >= 256) {
2783 PyErr_SetString(PyExc_ValueError,
2784 "bytes must be in range(0, 256)");
2785 goto error;
2786 }
2787 *str++ = (char) value;
2788 }
2789 return bytes;
2790
2791 error:
2792 Py_DECREF(bytes);
2793 return NULL;
2794 }
2795
2796 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2797 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2798 {
2799 char *str;
2800 Py_ssize_t i, size;
2801 _PyBytesWriter writer;
2802
2803 /* For iterator version, create a bytes object and resize as needed */
2804 size = PyObject_LengthHint(x, 64);
2805 if (size == -1 && PyErr_Occurred())
2806 return NULL;
2807
2808 _PyBytesWriter_Init(&writer);
2809 str = _PyBytesWriter_Alloc(&writer, size);
2810 if (str == NULL)
2811 return NULL;
2812 writer.overallocate = 1;
2813 size = writer.allocated;
2814
2815 /* Run the iterator to exhaustion */
2816 for (i = 0; ; i++) {
2817 PyObject *item;
2818 Py_ssize_t value;
2819
2820 /* Get the next item */
2821 item = PyIter_Next(it);
2822 if (item == NULL) {
2823 if (PyErr_Occurred())
2824 goto error;
2825 break;
2826 }
2827
2828 /* Interpret it as an int (__index__) */
2829 value = PyNumber_AsSsize_t(item, NULL);
2830 Py_DECREF(item);
2831 if (value == -1 && PyErr_Occurred())
2832 goto error;
2833
2834 /* Range check */
2835 if (value < 0 || value >= 256) {
2836 PyErr_SetString(PyExc_ValueError,
2837 "bytes must be in range(0, 256)");
2838 goto error;
2839 }
2840
2841 /* Append the byte */
2842 if (i >= size) {
2843 str = _PyBytesWriter_Resize(&writer, str, size+1);
2844 if (str == NULL)
2845 return NULL;
2846 size = writer.allocated;
2847 }
2848 *str++ = (char) value;
2849 }
2850
2851 return _PyBytesWriter_Finish(&writer, str);
2852
2853 error:
2854 _PyBytesWriter_Dealloc(&writer);
2855 return NULL;
2856 }
2857
2858 PyObject *
PyBytes_FromObject(PyObject * x)2859 PyBytes_FromObject(PyObject *x)
2860 {
2861 PyObject *it, *result;
2862
2863 if (x == NULL) {
2864 PyErr_BadInternalCall();
2865 return NULL;
2866 }
2867
2868 if (PyBytes_CheckExact(x)) {
2869 Py_INCREF(x);
2870 return x;
2871 }
2872
2873 /* Use the modern buffer interface */
2874 if (PyObject_CheckBuffer(x))
2875 return _PyBytes_FromBuffer(x);
2876
2877 if (PyList_CheckExact(x))
2878 return _PyBytes_FromList(x);
2879
2880 if (PyTuple_CheckExact(x))
2881 return _PyBytes_FromTuple(x);
2882
2883 if (!PyUnicode_Check(x)) {
2884 it = PyObject_GetIter(x);
2885 if (it != NULL) {
2886 result = _PyBytes_FromIterator(it, x);
2887 Py_DECREF(it);
2888 return result;
2889 }
2890 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2891 return NULL;
2892 }
2893 }
2894
2895 PyErr_Format(PyExc_TypeError,
2896 "cannot convert '%.200s' object to bytes",
2897 Py_TYPE(x)->tp_name);
2898 return NULL;
2899 }
2900
2901 /* This allocator is needed for subclasses don't want to use __new__.
2902 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2903 *
2904 * This allocator will be removed when ob_shash is removed.
2905 */
2906 static PyObject *
bytes_alloc(PyTypeObject * self,Py_ssize_t nitems)2907 bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2908 {
2909 PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2910 if (obj == NULL) {
2911 return NULL;
2912 }
2913 _Py_COMP_DIAG_PUSH
2914 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2915 obj->ob_shash = -1;
2916 _Py_COMP_DIAG_POP
2917 return (PyObject*)obj;
2918 }
2919
2920 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2921 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2922 {
2923 PyObject *pnew;
2924 Py_ssize_t n;
2925
2926 assert(PyType_IsSubtype(type, &PyBytes_Type));
2927 assert(PyBytes_Check(tmp));
2928 n = PyBytes_GET_SIZE(tmp);
2929 pnew = type->tp_alloc(type, n);
2930 if (pnew != NULL) {
2931 memcpy(PyBytes_AS_STRING(pnew),
2932 PyBytes_AS_STRING(tmp), n+1);
2933 _Py_COMP_DIAG_PUSH
2934 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2935 ((PyBytesObject *)pnew)->ob_shash =
2936 ((PyBytesObject *)tmp)->ob_shash;
2937 _Py_COMP_DIAG_POP
2938 }
2939 return pnew;
2940 }
2941
2942 PyDoc_STRVAR(bytes_doc,
2943 "bytes(iterable_of_ints) -> bytes\n\
2944 bytes(string, encoding[, errors]) -> bytes\n\
2945 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2946 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2947 bytes() -> empty bytes object\n\
2948 \n\
2949 Construct an immutable array of bytes from:\n\
2950 - an iterable yielding integers in range(256)\n\
2951 - a text string encoded using the specified encoding\n\
2952 - any object implementing the buffer API.\n\
2953 - an integer");
2954
2955 static PyObject *bytes_iter(PyObject *seq);
2956
2957 PyTypeObject PyBytes_Type = {
2958 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2959 "bytes",
2960 PyBytesObject_SIZE,
2961 sizeof(char),
2962 0, /* tp_dealloc */
2963 0, /* tp_vectorcall_offset */
2964 0, /* tp_getattr */
2965 0, /* tp_setattr */
2966 0, /* tp_as_async */
2967 (reprfunc)bytes_repr, /* tp_repr */
2968 &bytes_as_number, /* tp_as_number */
2969 &bytes_as_sequence, /* tp_as_sequence */
2970 &bytes_as_mapping, /* tp_as_mapping */
2971 (hashfunc)bytes_hash, /* tp_hash */
2972 0, /* tp_call */
2973 bytes_str, /* tp_str */
2974 PyObject_GenericGetAttr, /* tp_getattro */
2975 0, /* tp_setattro */
2976 &bytes_as_buffer, /* tp_as_buffer */
2977 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2978 Py_TPFLAGS_BYTES_SUBCLASS |
2979 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2980 bytes_doc, /* tp_doc */
2981 0, /* tp_traverse */
2982 0, /* tp_clear */
2983 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2984 0, /* tp_weaklistoffset */
2985 bytes_iter, /* tp_iter */
2986 0, /* tp_iternext */
2987 bytes_methods, /* tp_methods */
2988 0, /* tp_members */
2989 0, /* tp_getset */
2990 0, /* tp_base */
2991 0, /* tp_dict */
2992 0, /* tp_descr_get */
2993 0, /* tp_descr_set */
2994 0, /* tp_dictoffset */
2995 0, /* tp_init */
2996 bytes_alloc, /* tp_alloc */
2997 bytes_new, /* tp_new */
2998 PyObject_Del, /* tp_free */
2999 };
3000
3001 void
PyBytes_Concat(PyObject ** pv,PyObject * w)3002 PyBytes_Concat(PyObject **pv, PyObject *w)
3003 {
3004 assert(pv != NULL);
3005 if (*pv == NULL)
3006 return;
3007 if (w == NULL) {
3008 Py_CLEAR(*pv);
3009 return;
3010 }
3011
3012 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3013 /* Only one reference, so we can resize in place */
3014 Py_ssize_t oldsize;
3015 Py_buffer wb;
3016
3017 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3018 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3019 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3020 Py_CLEAR(*pv);
3021 return;
3022 }
3023
3024 oldsize = PyBytes_GET_SIZE(*pv);
3025 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3026 PyErr_NoMemory();
3027 goto error;
3028 }
3029 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3030 goto error;
3031
3032 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3033 PyBuffer_Release(&wb);
3034 return;
3035
3036 error:
3037 PyBuffer_Release(&wb);
3038 Py_CLEAR(*pv);
3039 return;
3040 }
3041
3042 else {
3043 /* Multiple references, need to create new object */
3044 PyObject *v;
3045 v = bytes_concat(*pv, w);
3046 Py_SETREF(*pv, v);
3047 }
3048 }
3049
3050 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)3051 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3052 {
3053 PyBytes_Concat(pv, w);
3054 Py_XDECREF(w);
3055 }
3056
3057
3058 /* The following function breaks the notion that bytes are immutable:
3059 it changes the size of a bytes object. We get away with this only if there
3060 is only one module referencing the object. You can also think of it
3061 as creating a new bytes object and destroying the old one, only
3062 more efficiently. In any case, don't use this if the bytes object may
3063 already be known to some other part of the code...
3064 Note that if there's not enough memory to resize the bytes object, the
3065 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3066 memory" exception is set, and -1 is returned. Else (on success) 0 is
3067 returned, and the value in *pv may or may not be the same as on input.
3068 As always, an extra byte is allocated for a trailing \0 byte (newsize
3069 does *not* include that), and a trailing \0 byte is stored.
3070 */
3071
3072 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3073 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3074 {
3075 PyObject *v;
3076 PyBytesObject *sv;
3077 v = *pv;
3078 if (!PyBytes_Check(v) || newsize < 0) {
3079 goto error;
3080 }
3081 if (Py_SIZE(v) == newsize) {
3082 /* return early if newsize equals to v->ob_size */
3083 return 0;
3084 }
3085 if (Py_SIZE(v) == 0) {
3086 if (newsize == 0) {
3087 return 0;
3088 }
3089 *pv = _PyBytes_FromSize(newsize, 0);
3090 Py_DECREF(v);
3091 return (*pv == NULL) ? -1 : 0;
3092 }
3093 if (Py_REFCNT(v) != 1) {
3094 goto error;
3095 }
3096 if (newsize == 0) {
3097 *pv = bytes_new_empty();
3098 Py_DECREF(v);
3099 return 0;
3100 }
3101 /* XXX UNREF/NEWREF interface should be more symmetrical */
3102 #ifdef Py_REF_DEBUG
3103 _Py_RefTotal--;
3104 #endif
3105 #ifdef Py_TRACE_REFS
3106 _Py_ForgetReference(v);
3107 #endif
3108 *pv = (PyObject *)
3109 PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3110 if (*pv == NULL) {
3111 PyObject_Free(v);
3112 PyErr_NoMemory();
3113 return -1;
3114 }
3115 _Py_NewReference(*pv);
3116 sv = (PyBytesObject *) *pv;
3117 Py_SET_SIZE(sv, newsize);
3118 sv->ob_sval[newsize] = '\0';
3119 _Py_COMP_DIAG_PUSH
3120 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
3121 sv->ob_shash = -1; /* invalidate cached hash value */
3122 _Py_COMP_DIAG_POP
3123 return 0;
3124 error:
3125 *pv = 0;
3126 Py_DECREF(v);
3127 PyErr_BadInternalCall();
3128 return -1;
3129 }
3130
3131
3132 PyStatus
_PyBytes_InitTypes(PyInterpreterState * interp)3133 _PyBytes_InitTypes(PyInterpreterState *interp)
3134 {
3135 if (!_Py_IsMainInterpreter(interp)) {
3136 return _PyStatus_OK();
3137 }
3138
3139 if (PyType_Ready(&PyBytes_Type) < 0) {
3140 return _PyStatus_ERR("Can't initialize bytes type");
3141 }
3142
3143 if (PyType_Ready(&PyBytesIter_Type) < 0) {
3144 return _PyStatus_ERR("Can't initialize bytes iterator type");
3145 }
3146
3147 return _PyStatus_OK();
3148 }
3149
3150
3151 /*********************** Bytes Iterator ****************************/
3152
3153 typedef struct {
3154 PyObject_HEAD
3155 Py_ssize_t it_index;
3156 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3157 } striterobject;
3158
3159 static void
striter_dealloc(striterobject * it)3160 striter_dealloc(striterobject *it)
3161 {
3162 _PyObject_GC_UNTRACK(it);
3163 Py_XDECREF(it->it_seq);
3164 PyObject_GC_Del(it);
3165 }
3166
3167 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3168 striter_traverse(striterobject *it, visitproc visit, void *arg)
3169 {
3170 Py_VISIT(it->it_seq);
3171 return 0;
3172 }
3173
3174 static PyObject *
striter_next(striterobject * it)3175 striter_next(striterobject *it)
3176 {
3177 PyBytesObject *seq;
3178
3179 assert(it != NULL);
3180 seq = it->it_seq;
3181 if (seq == NULL)
3182 return NULL;
3183 assert(PyBytes_Check(seq));
3184
3185 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3186 return _PyLong_FromUnsignedChar(
3187 (unsigned char)seq->ob_sval[it->it_index++]);
3188 }
3189
3190 it->it_seq = NULL;
3191 Py_DECREF(seq);
3192 return NULL;
3193 }
3194
3195 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3196 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3197 {
3198 Py_ssize_t len = 0;
3199 if (it->it_seq)
3200 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3201 return PyLong_FromSsize_t(len);
3202 }
3203
3204 PyDoc_STRVAR(length_hint_doc,
3205 "Private method returning an estimate of len(list(it)).");
3206
3207 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3208 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3209 {
3210 PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3211
3212 /* _PyEval_GetBuiltin can invoke arbitrary code,
3213 * call must be before access of iterator pointers.
3214 * see issue #101765 */
3215
3216 if (it->it_seq != NULL) {
3217 return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3218 } else {
3219 return Py_BuildValue("N(())", iter);
3220 }
3221 }
3222
3223 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3224
3225 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3226 striter_setstate(striterobject *it, PyObject *state)
3227 {
3228 Py_ssize_t index = PyLong_AsSsize_t(state);
3229 if (index == -1 && PyErr_Occurred())
3230 return NULL;
3231 if (it->it_seq != NULL) {
3232 if (index < 0)
3233 index = 0;
3234 else if (index > PyBytes_GET_SIZE(it->it_seq))
3235 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3236 it->it_index = index;
3237 }
3238 Py_RETURN_NONE;
3239 }
3240
3241 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3242
3243 static PyMethodDef striter_methods[] = {
3244 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3245 length_hint_doc},
3246 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3247 reduce_doc},
3248 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3249 setstate_doc},
3250 {NULL, NULL} /* sentinel */
3251 };
3252
3253 PyTypeObject PyBytesIter_Type = {
3254 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3255 "bytes_iterator", /* tp_name */
3256 sizeof(striterobject), /* tp_basicsize */
3257 0, /* tp_itemsize */
3258 /* methods */
3259 (destructor)striter_dealloc, /* tp_dealloc */
3260 0, /* tp_vectorcall_offset */
3261 0, /* tp_getattr */
3262 0, /* tp_setattr */
3263 0, /* tp_as_async */
3264 0, /* tp_repr */
3265 0, /* tp_as_number */
3266 0, /* tp_as_sequence */
3267 0, /* tp_as_mapping */
3268 0, /* tp_hash */
3269 0, /* tp_call */
3270 0, /* tp_str */
3271 PyObject_GenericGetAttr, /* tp_getattro */
3272 0, /* tp_setattro */
3273 0, /* tp_as_buffer */
3274 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3275 0, /* tp_doc */
3276 (traverseproc)striter_traverse, /* tp_traverse */
3277 0, /* tp_clear */
3278 0, /* tp_richcompare */
3279 0, /* tp_weaklistoffset */
3280 PyObject_SelfIter, /* tp_iter */
3281 (iternextfunc)striter_next, /* tp_iternext */
3282 striter_methods, /* tp_methods */
3283 0,
3284 };
3285
3286 static PyObject *
bytes_iter(PyObject * seq)3287 bytes_iter(PyObject *seq)
3288 {
3289 striterobject *it;
3290
3291 if (!PyBytes_Check(seq)) {
3292 PyErr_BadInternalCall();
3293 return NULL;
3294 }
3295 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3296 if (it == NULL)
3297 return NULL;
3298 it->it_index = 0;
3299 Py_INCREF(seq);
3300 it->it_seq = (PyBytesObject *)seq;
3301 _PyObject_GC_TRACK(it);
3302 return (PyObject *)it;
3303 }
3304
3305
3306 /* _PyBytesWriter API */
3307
3308 #ifdef MS_WINDOWS
3309 /* On Windows, overallocate by 50% is the best factor */
3310 # define OVERALLOCATE_FACTOR 2
3311 #else
3312 /* On Linux, overallocate by 25% is the best factor */
3313 # define OVERALLOCATE_FACTOR 4
3314 #endif
3315
3316 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3317 _PyBytesWriter_Init(_PyBytesWriter *writer)
3318 {
3319 /* Set all attributes before small_buffer to 0 */
3320 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3321 #ifndef NDEBUG
3322 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3323 sizeof(writer->small_buffer));
3324 #endif
3325 }
3326
3327 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3328 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3329 {
3330 Py_CLEAR(writer->buffer);
3331 }
3332
3333 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3334 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3335 {
3336 if (writer->use_small_buffer) {
3337 assert(writer->buffer == NULL);
3338 return writer->small_buffer;
3339 }
3340 else if (writer->use_bytearray) {
3341 assert(writer->buffer != NULL);
3342 return PyByteArray_AS_STRING(writer->buffer);
3343 }
3344 else {
3345 assert(writer->buffer != NULL);
3346 return PyBytes_AS_STRING(writer->buffer);
3347 }
3348 }
3349
3350 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3351 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3352 {
3353 const char *start = _PyBytesWriter_AsString(writer);
3354 assert(str != NULL);
3355 assert(str >= start);
3356 assert(str - start <= writer->allocated);
3357 return str - start;
3358 }
3359
3360 #ifndef NDEBUG
3361 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3362 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3363 {
3364 const char *start, *end;
3365
3366 if (writer->use_small_buffer) {
3367 assert(writer->buffer == NULL);
3368 }
3369 else {
3370 assert(writer->buffer != NULL);
3371 if (writer->use_bytearray)
3372 assert(PyByteArray_CheckExact(writer->buffer));
3373 else
3374 assert(PyBytes_CheckExact(writer->buffer));
3375 assert(Py_REFCNT(writer->buffer) == 1);
3376 }
3377
3378 if (writer->use_bytearray) {
3379 /* bytearray has its own overallocation algorithm,
3380 writer overallocation must be disabled */
3381 assert(!writer->overallocate);
3382 }
3383
3384 assert(0 <= writer->allocated);
3385 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3386 /* the last byte must always be null */
3387 start = _PyBytesWriter_AsString(writer);
3388 assert(start[writer->allocated] == 0);
3389
3390 end = start + writer->allocated;
3391 assert(str != NULL);
3392 assert(start <= str && str <= end);
3393 return 1;
3394 }
3395 #endif
3396
3397 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3398 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3399 {
3400 Py_ssize_t allocated, pos;
3401
3402 assert(_PyBytesWriter_CheckConsistency(writer, str));
3403 assert(writer->allocated < size);
3404
3405 allocated = size;
3406 if (writer->overallocate
3407 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3408 /* overallocate to limit the number of realloc() */
3409 allocated += allocated / OVERALLOCATE_FACTOR;
3410 }
3411
3412 pos = _PyBytesWriter_GetSize(writer, str);
3413 if (!writer->use_small_buffer) {
3414 if (writer->use_bytearray) {
3415 if (PyByteArray_Resize(writer->buffer, allocated))
3416 goto error;
3417 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3418 but we cannot use ob_alloc because bytes may need to be moved
3419 to use the whole buffer. bytearray uses an internal optimization
3420 to avoid moving or copying bytes when bytes are removed at the
3421 beginning (ex: del bytearray[:1]). */
3422 }
3423 else {
3424 if (_PyBytes_Resize(&writer->buffer, allocated))
3425 goto error;
3426 }
3427 }
3428 else {
3429 /* convert from stack buffer to bytes object buffer */
3430 assert(writer->buffer == NULL);
3431
3432 if (writer->use_bytearray)
3433 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3434 else
3435 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3436 if (writer->buffer == NULL)
3437 goto error;
3438
3439 if (pos != 0) {
3440 char *dest;
3441 if (writer->use_bytearray)
3442 dest = PyByteArray_AS_STRING(writer->buffer);
3443 else
3444 dest = PyBytes_AS_STRING(writer->buffer);
3445 memcpy(dest,
3446 writer->small_buffer,
3447 pos);
3448 }
3449
3450 writer->use_small_buffer = 0;
3451 #ifndef NDEBUG
3452 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3453 sizeof(writer->small_buffer));
3454 #endif
3455 }
3456 writer->allocated = allocated;
3457
3458 str = _PyBytesWriter_AsString(writer) + pos;
3459 assert(_PyBytesWriter_CheckConsistency(writer, str));
3460 return str;
3461
3462 error:
3463 _PyBytesWriter_Dealloc(writer);
3464 return NULL;
3465 }
3466
3467 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3468 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3469 {
3470 Py_ssize_t new_min_size;
3471
3472 assert(_PyBytesWriter_CheckConsistency(writer, str));
3473 assert(size >= 0);
3474
3475 if (size == 0) {
3476 /* nothing to do */
3477 return str;
3478 }
3479
3480 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3481 PyErr_NoMemory();
3482 _PyBytesWriter_Dealloc(writer);
3483 return NULL;
3484 }
3485 new_min_size = writer->min_size + size;
3486
3487 if (new_min_size > writer->allocated)
3488 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3489
3490 writer->min_size = new_min_size;
3491 return str;
3492 }
3493
3494 /* Allocate the buffer to write size bytes.
3495 Return the pointer to the beginning of buffer data.
3496 Raise an exception and return NULL on error. */
3497 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3498 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3499 {
3500 /* ensure that _PyBytesWriter_Alloc() is only called once */
3501 assert(writer->min_size == 0 && writer->buffer == NULL);
3502 assert(size >= 0);
3503
3504 writer->use_small_buffer = 1;
3505 #ifndef NDEBUG
3506 writer->allocated = sizeof(writer->small_buffer) - 1;
3507 /* In debug mode, don't use the full small buffer because it is less
3508 efficient than bytes and bytearray objects to detect buffer underflow
3509 and buffer overflow. Use 10 bytes of the small buffer to test also
3510 code using the smaller buffer in debug mode.
3511
3512 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3513 in debug mode to also be able to detect stack overflow when running
3514 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3515 if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3516 stack overflow. */
3517 writer->allocated = Py_MIN(writer->allocated, 10);
3518 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3519 to detect buffer overflow */
3520 writer->small_buffer[writer->allocated] = 0;
3521 #else
3522 writer->allocated = sizeof(writer->small_buffer);
3523 #endif
3524 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3525 }
3526
3527 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3528 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3529 {
3530 Py_ssize_t size;
3531 PyObject *result;
3532
3533 assert(_PyBytesWriter_CheckConsistency(writer, str));
3534
3535 size = _PyBytesWriter_GetSize(writer, str);
3536 if (size == 0 && !writer->use_bytearray) {
3537 Py_CLEAR(writer->buffer);
3538 /* Get the empty byte string singleton */
3539 result = PyBytes_FromStringAndSize(NULL, 0);
3540 }
3541 else if (writer->use_small_buffer) {
3542 if (writer->use_bytearray) {
3543 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3544 }
3545 else {
3546 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3547 }
3548 }
3549 else {
3550 result = writer->buffer;
3551 writer->buffer = NULL;
3552
3553 if (size != writer->allocated) {
3554 if (writer->use_bytearray) {
3555 if (PyByteArray_Resize(result, size)) {
3556 Py_DECREF(result);
3557 return NULL;
3558 }
3559 }
3560 else {
3561 if (_PyBytes_Resize(&result, size)) {
3562 assert(result == NULL);
3563 return NULL;
3564 }
3565 }
3566 }
3567 }
3568 return result;
3569 }
3570
3571 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3572 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3573 const void *bytes, Py_ssize_t size)
3574 {
3575 char *str = (char *)ptr;
3576
3577 str = _PyBytesWriter_Prepare(writer, str, size);
3578 if (str == NULL)
3579 return NULL;
3580
3581 memcpy(str, bytes, size);
3582 str += size;
3583
3584 return str;
3585 }
3586
3587
3588 void
_PyBytes_Repeat(char * dest,Py_ssize_t len_dest,const char * src,Py_ssize_t len_src)3589 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3590 const char* src, Py_ssize_t len_src)
3591 {
3592 if (len_dest == 0) {
3593 return;
3594 }
3595 if (len_src == 1) {
3596 memset(dest, src[0], len_dest);
3597 }
3598 else {
3599 if (src != dest) {
3600 memcpy(dest, src, len_src);
3601 }
3602 Py_ssize_t copied = len_src;
3603 while (copied < len_dest) {
3604 Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3605 memcpy(dest + copied, dest, bytes_to_copy);
3606 copied += bytes_to_copy;
3607 }
3608 }
3609 }
3610
3611