1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "pycore_abstract.h" // _PyIndex_Check()
7 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8 #include "pycore_format.h" // F_LJUST
9 #include "pycore_initconfig.h" // _PyStatus_OK()
10 #include "pycore_object.h" // _PyObject_GC_TRACK
11 #include "pycore_pymem.h" // PYMEM_CLEANBYTE
12
13 #include "pystrhex.h"
14 #include <stddef.h>
15
16 /*[clinic input]
17 class bytes "PyBytesObject *" "&PyBytes_Type"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
20
21 #include "clinic/bytesobject.c.h"
22
23 _Py_IDENTIFIER(__bytes__);
24
25 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
26 for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 or 7 bytes per bytes object allocation on a typical system.
30 */
31 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
33 /* Forward declaration */
34 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
37
38 static struct _Py_bytes_state*
get_bytes_state(void)39 get_bytes_state(void)
40 {
41 PyInterpreterState *interp = _PyInterpreterState_GET();
42 return &interp->bytes;
43 }
44
45
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49 struct _Py_bytes_state *state = get_bytes_state();
50 // bytes_get_empty() must not be called before _PyBytes_Init()
51 // or after _PyBytes_Fini()
52 assert(state->empty_string != NULL);
53 return state->empty_string;
54 }
55
56
57 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)58 static inline PyObject* bytes_new_empty(void)
59 {
60 PyObject *empty = bytes_get_empty();
61 Py_INCREF(empty);
62 return (PyObject *)empty;
63 }
64
65
66 static int
bytes_create_empty_string_singleton(struct _Py_bytes_state * state)67 bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
68 {
69 // Create the empty bytes string singleton
70 PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
71 if (op == NULL) {
72 return -1;
73 }
74 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
75 op->ob_shash = -1;
76 op->ob_sval[0] = '\0';
77
78 assert(state->empty_string == NULL);
79 state->empty_string = (PyObject *)op;
80 return 0;
81 }
82
83
84 /*
85 For PyBytes_FromString(), the parameter `str' points to a null-terminated
86 string containing exactly `size' bytes.
87
88 For PyBytes_FromStringAndSize(), the parameter `str' is
89 either NULL or else points to a string containing at least `size' bytes.
90 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
91 not have to be null-terminated. (Therefore it is safe to construct a
92 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
93 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
94 bytes (setting the last byte to the null terminating character) and you can
95 fill in the data yourself. If `str' is non-NULL then the resulting
96 PyBytes object must be treated as immutable and you must not fill in nor
97 alter the data yourself, since the strings may be shared.
98
99 The PyObject member `op->ob_size', which denotes the number of "extra
100 items" in a variable-size object, will contain the number of bytes
101 allocated for string data, not counting the null terminating character.
102 It is therefore equal to the `size' parameter (for
103 PyBytes_FromStringAndSize()) or the length of the string in the `str'
104 parameter (for PyBytes_FromString()).
105 */
106 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)107 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
108 {
109 PyBytesObject *op;
110 assert(size >= 0);
111
112 if (size == 0) {
113 return bytes_new_empty();
114 }
115
116 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
117 PyErr_SetString(PyExc_OverflowError,
118 "byte string is too large");
119 return NULL;
120 }
121
122 /* Inline PyObject_NewVar */
123 if (use_calloc)
124 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
125 else
126 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
127 if (op == NULL) {
128 return PyErr_NoMemory();
129 }
130 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
131 op->ob_shash = -1;
132 if (!use_calloc) {
133 op->ob_sval[size] = '\0';
134 }
135 return (PyObject *) op;
136 }
137
138 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)139 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
140 {
141 PyBytesObject *op;
142 if (size < 0) {
143 PyErr_SetString(PyExc_SystemError,
144 "Negative size passed to PyBytes_FromStringAndSize");
145 return NULL;
146 }
147 if (size == 1 && str != NULL) {
148 struct _Py_bytes_state *state = get_bytes_state();
149 op = state->characters[*str & UCHAR_MAX];
150 if (op != NULL) {
151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 }
155 if (size == 0) {
156 return bytes_new_empty();
157 }
158
159 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
160 if (op == NULL)
161 return NULL;
162 if (str == NULL)
163 return (PyObject *) op;
164
165 memcpy(op->ob_sval, str, size);
166 /* share short strings */
167 if (size == 1) {
168 struct _Py_bytes_state *state = get_bytes_state();
169 Py_INCREF(op);
170 state->characters[*str & UCHAR_MAX] = op;
171 }
172 return (PyObject *) op;
173 }
174
175 PyObject *
PyBytes_FromString(const char * str)176 PyBytes_FromString(const char *str)
177 {
178 size_t size;
179 PyBytesObject *op;
180
181 assert(str != NULL);
182 size = strlen(str);
183 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
184 PyErr_SetString(PyExc_OverflowError,
185 "byte string is too long");
186 return NULL;
187 }
188
189 struct _Py_bytes_state *state = get_bytes_state();
190 if (size == 0) {
191 return bytes_new_empty();
192 }
193 else if (size == 1) {
194 op = state->characters[*str & UCHAR_MAX];
195 if (op != NULL) {
196 Py_INCREF(op);
197 return (PyObject *)op;
198 }
199 }
200
201 /* Inline PyObject_NewVar */
202 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
203 if (op == NULL) {
204 return PyErr_NoMemory();
205 }
206 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
207 op->ob_shash = -1;
208 memcpy(op->ob_sval, str, size+1);
209 /* share short strings */
210 if (size == 1) {
211 assert(state->characters[*str & UCHAR_MAX] == NULL);
212 Py_INCREF(op);
213 state->characters[*str & UCHAR_MAX] = op;
214 }
215 return (PyObject *) op;
216 }
217
218 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)219 PyBytes_FromFormatV(const char *format, va_list vargs)
220 {
221 char *s;
222 const char *f;
223 const char *p;
224 Py_ssize_t prec;
225 int longflag;
226 int size_tflag;
227 /* Longest 64-bit formatted numbers:
228 - "18446744073709551615\0" (21 bytes)
229 - "-9223372036854775808\0" (21 bytes)
230 Decimal takes the most space (it isn't enough for octal.)
231
232 Longest 64-bit pointer representation:
233 "0xffffffffffffffff\0" (19 bytes). */
234 char buffer[21];
235 _PyBytesWriter writer;
236
237 _PyBytesWriter_Init(&writer);
238
239 s = _PyBytesWriter_Alloc(&writer, strlen(format));
240 if (s == NULL)
241 return NULL;
242 writer.overallocate = 1;
243
244 #define WRITE_BYTES(str) \
245 do { \
246 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
247 if (s == NULL) \
248 goto error; \
249 } while (0)
250
251 for (f = format; *f; f++) {
252 if (*f != '%') {
253 *s++ = *f;
254 continue;
255 }
256
257 p = f++;
258
259 /* ignore the width (ex: 10 in "%10s") */
260 while (Py_ISDIGIT(*f))
261 f++;
262
263 /* parse the precision (ex: 10 in "%.10s") */
264 prec = 0;
265 if (*f == '.') {
266 f++;
267 for (; Py_ISDIGIT(*f); f++) {
268 prec = (prec * 10) + (*f - '0');
269 }
270 }
271
272 while (*f && *f != '%' && !Py_ISALPHA(*f))
273 f++;
274
275 /* handle the long flag ('l'), but only for %ld and %lu.
276 others can be added when necessary. */
277 longflag = 0;
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282
283 /* handle the size_t flag ('z'). */
284 size_tflag = 0;
285 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
286 size_tflag = 1;
287 ++f;
288 }
289
290 /* subtract bytes preallocated for the format string
291 (ex: 2 for "%s") */
292 writer.min_size -= (f - p + 1);
293
294 switch (*f) {
295 case 'c':
296 {
297 int c = va_arg(vargs, int);
298 if (c < 0 || c > 255) {
299 PyErr_SetString(PyExc_OverflowError,
300 "PyBytes_FromFormatV(): %c format "
301 "expects an integer in range [0; 255]");
302 goto error;
303 }
304 writer.min_size++;
305 *s++ = (unsigned char)c;
306 break;
307 }
308
309 case 'd':
310 if (longflag) {
311 sprintf(buffer, "%ld", va_arg(vargs, long));
312 }
313 else if (size_tflag) {
314 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
315 }
316 else {
317 sprintf(buffer, "%d", va_arg(vargs, int));
318 }
319 assert(strlen(buffer) < sizeof(buffer));
320 WRITE_BYTES(buffer);
321 break;
322
323 case 'u':
324 if (longflag) {
325 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
326 }
327 else if (size_tflag) {
328 sprintf(buffer, "%zu", va_arg(vargs, size_t));
329 }
330 else {
331 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
332 }
333 assert(strlen(buffer) < sizeof(buffer));
334 WRITE_BYTES(buffer);
335 break;
336
337 case 'i':
338 sprintf(buffer, "%i", va_arg(vargs, int));
339 assert(strlen(buffer) < sizeof(buffer));
340 WRITE_BYTES(buffer);
341 break;
342
343 case 'x':
344 sprintf(buffer, "%x", va_arg(vargs, int));
345 assert(strlen(buffer) < sizeof(buffer));
346 WRITE_BYTES(buffer);
347 break;
348
349 case 's':
350 {
351 Py_ssize_t i;
352
353 p = va_arg(vargs, const char*);
354 if (prec <= 0) {
355 i = strlen(p);
356 }
357 else {
358 i = 0;
359 while (i < prec && p[i]) {
360 i++;
361 }
362 }
363 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
364 if (s == NULL)
365 goto error;
366 break;
367 }
368
369 case 'p':
370 sprintf(buffer, "%p", va_arg(vargs, void*));
371 assert(strlen(buffer) < sizeof(buffer));
372 /* %p is ill-defined: ensure leading 0x. */
373 if (buffer[1] == 'X')
374 buffer[1] = 'x';
375 else if (buffer[1] != 'x') {
376 memmove(buffer+2, buffer, strlen(buffer)+1);
377 buffer[0] = '0';
378 buffer[1] = 'x';
379 }
380 WRITE_BYTES(buffer);
381 break;
382
383 case '%':
384 writer.min_size++;
385 *s++ = '%';
386 break;
387
388 default:
389 if (*f == 0) {
390 /* fix min_size if we reached the end of the format string */
391 writer.min_size++;
392 }
393
394 /* invalid format string: copy unformatted string and exit */
395 WRITE_BYTES(p);
396 return _PyBytesWriter_Finish(&writer, s);
397 }
398 }
399
400 #undef WRITE_BYTES
401
402 return _PyBytesWriter_Finish(&writer, s);
403
404 error:
405 _PyBytesWriter_Dealloc(&writer);
406 return NULL;
407 }
408
409 PyObject *
PyBytes_FromFormat(const char * format,...)410 PyBytes_FromFormat(const char *format, ...)
411 {
412 PyObject* ret;
413 va_list vargs;
414
415 #ifdef HAVE_STDARG_PROTOTYPES
416 va_start(vargs, format);
417 #else
418 va_start(vargs);
419 #endif
420 ret = PyBytes_FromFormatV(format, vargs);
421 va_end(vargs);
422 return ret;
423 }
424
425 /* Helpers for formatstring */
426
427 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)428 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
429 {
430 Py_ssize_t argidx = *p_argidx;
431 if (argidx < arglen) {
432 (*p_argidx)++;
433 if (arglen < 0)
434 return args;
435 else
436 return PyTuple_GetItem(args, argidx);
437 }
438 PyErr_SetString(PyExc_TypeError,
439 "not enough arguments for format string");
440 return NULL;
441 }
442
443 /* Returns a new reference to a PyBytes object, or NULL on failure. */
444
445 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)446 formatfloat(PyObject *v, int flags, int prec, int type,
447 PyObject **p_result, _PyBytesWriter *writer, char *str)
448 {
449 char *p;
450 PyObject *result;
451 double x;
452 size_t len;
453
454 x = PyFloat_AsDouble(v);
455 if (x == -1.0 && PyErr_Occurred()) {
456 PyErr_Format(PyExc_TypeError, "float argument required, "
457 "not %.200s", Py_TYPE(v)->tp_name);
458 return NULL;
459 }
460
461 if (prec < 0)
462 prec = 6;
463
464 p = PyOS_double_to_string(x, type, prec,
465 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
466
467 if (p == NULL)
468 return NULL;
469
470 len = strlen(p);
471 if (writer != NULL) {
472 str = _PyBytesWriter_Prepare(writer, str, len);
473 if (str == NULL)
474 return NULL;
475 memcpy(str, p, len);
476 PyMem_Free(p);
477 str += len;
478 return str;
479 }
480
481 result = PyBytes_FromStringAndSize(p, len);
482 PyMem_Free(p);
483 *p_result = result;
484 return result != NULL ? str : NULL;
485 }
486
487 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)488 formatlong(PyObject *v, int flags, int prec, int type)
489 {
490 PyObject *result, *iobj;
491 if (type == 'i')
492 type = 'd';
493 if (PyLong_Check(v))
494 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
495 if (PyNumber_Check(v)) {
496 /* make sure number is a type of integer for o, x, and X */
497 if (type == 'o' || type == 'x' || type == 'X')
498 iobj = _PyNumber_Index(v);
499 else
500 iobj = PyNumber_Long(v);
501 if (iobj != NULL) {
502 assert(PyLong_Check(iobj));
503 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
504 Py_DECREF(iobj);
505 return result;
506 }
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return NULL;
509 }
510 PyErr_Format(PyExc_TypeError,
511 "%%%c format: %s is required, not %.200s", type,
512 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
513 : "a real number",
514 Py_TYPE(v)->tp_name);
515 return NULL;
516 }
517
518 static int
byte_converter(PyObject * arg,char * p)519 byte_converter(PyObject *arg, char *p)
520 {
521 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
522 *p = PyBytes_AS_STRING(arg)[0];
523 return 1;
524 }
525 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
526 *p = PyByteArray_AS_STRING(arg)[0];
527 return 1;
528 }
529 else {
530 int overflow;
531 long ival = PyLong_AsLongAndOverflow(arg, &overflow);
532 if (ival == -1 && PyErr_Occurred()) {
533 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
534 goto onError;
535 }
536 return 0;
537 }
538 if (!(0 <= ival && ival <= 255)) {
539 /* this includes an overflow in converting to C long */
540 PyErr_SetString(PyExc_OverflowError,
541 "%c arg not in range(256)");
542 return 0;
543 }
544 *p = (char)ival;
545 return 1;
546 }
547 onError:
548 PyErr_SetString(PyExc_TypeError,
549 "%c requires an integer in range(256) or a single byte");
550 return 0;
551 }
552
553 static PyObject *_PyBytes_FromBuffer(PyObject *x);
554
555 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)556 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
557 {
558 PyObject *func, *result;
559 /* is it a bytes object? */
560 if (PyBytes_Check(v)) {
561 *pbuf = PyBytes_AS_STRING(v);
562 *plen = PyBytes_GET_SIZE(v);
563 Py_INCREF(v);
564 return v;
565 }
566 if (PyByteArray_Check(v)) {
567 *pbuf = PyByteArray_AS_STRING(v);
568 *plen = PyByteArray_GET_SIZE(v);
569 Py_INCREF(v);
570 return v;
571 }
572 /* does it support __bytes__? */
573 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
574 if (func != NULL) {
575 result = _PyObject_CallNoArg(func);
576 Py_DECREF(func);
577 if (result == NULL)
578 return NULL;
579 if (!PyBytes_Check(result)) {
580 PyErr_Format(PyExc_TypeError,
581 "__bytes__ returned non-bytes (type %.200s)",
582 Py_TYPE(result)->tp_name);
583 Py_DECREF(result);
584 return NULL;
585 }
586 *pbuf = PyBytes_AS_STRING(result);
587 *plen = PyBytes_GET_SIZE(result);
588 return result;
589 }
590 /* does it support buffer protocol? */
591 if (PyObject_CheckBuffer(v)) {
592 /* maybe we can avoid making a copy of the buffer object here? */
593 result = _PyBytes_FromBuffer(v);
594 if (result == NULL)
595 return NULL;
596 *pbuf = PyBytes_AS_STRING(result);
597 *plen = PyBytes_GET_SIZE(result);
598 return result;
599 }
600 PyErr_Format(PyExc_TypeError,
601 "%%b requires a bytes-like object, "
602 "or an object that implements __bytes__, not '%.100s'",
603 Py_TYPE(v)->tp_name);
604 return NULL;
605 }
606
607 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
608
609 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)610 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
611 PyObject *args, int use_bytearray)
612 {
613 const char *fmt;
614 char *res;
615 Py_ssize_t arglen, argidx;
616 Py_ssize_t fmtcnt;
617 int args_owned = 0;
618 PyObject *dict = NULL;
619 _PyBytesWriter writer;
620
621 if (args == NULL) {
622 PyErr_BadInternalCall();
623 return NULL;
624 }
625 fmt = format;
626 fmtcnt = format_len;
627
628 _PyBytesWriter_Init(&writer);
629 writer.use_bytearray = use_bytearray;
630
631 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
632 if (res == NULL)
633 return NULL;
634 if (!use_bytearray)
635 writer.overallocate = 1;
636
637 if (PyTuple_Check(args)) {
638 arglen = PyTuple_GET_SIZE(args);
639 argidx = 0;
640 }
641 else {
642 arglen = -1;
643 argidx = -2;
644 }
645 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
646 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
647 !PyByteArray_Check(args)) {
648 dict = args;
649 }
650
651 while (--fmtcnt >= 0) {
652 if (*fmt != '%') {
653 Py_ssize_t len;
654 char *pos;
655
656 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
657 if (pos != NULL)
658 len = pos - fmt;
659 else
660 len = fmtcnt + 1;
661 assert(len != 0);
662
663 memcpy(res, fmt, len);
664 res += len;
665 fmt += len;
666 fmtcnt -= (len - 1);
667 }
668 else {
669 /* Got a format specifier */
670 int flags = 0;
671 Py_ssize_t width = -1;
672 int prec = -1;
673 int c = '\0';
674 int fill;
675 PyObject *v = NULL;
676 PyObject *temp = NULL;
677 const char *pbuf = NULL;
678 int sign;
679 Py_ssize_t len = 0;
680 char onechar; /* For byte_converter() */
681 Py_ssize_t alloc;
682
683 fmt++;
684 if (*fmt == '%') {
685 *res++ = '%';
686 fmt++;
687 fmtcnt--;
688 continue;
689 }
690 if (*fmt == '(') {
691 const char *keystart;
692 Py_ssize_t keylen;
693 PyObject *key;
694 int pcount = 1;
695
696 if (dict == NULL) {
697 PyErr_SetString(PyExc_TypeError,
698 "format requires a mapping");
699 goto error;
700 }
701 ++fmt;
702 --fmtcnt;
703 keystart = fmt;
704 /* Skip over balanced parentheses */
705 while (pcount > 0 && --fmtcnt >= 0) {
706 if (*fmt == ')')
707 --pcount;
708 else if (*fmt == '(')
709 ++pcount;
710 fmt++;
711 }
712 keylen = fmt - keystart - 1;
713 if (fmtcnt < 0 || pcount > 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "incomplete format key");
716 goto error;
717 }
718 key = PyBytes_FromStringAndSize(keystart,
719 keylen);
720 if (key == NULL)
721 goto error;
722 if (args_owned) {
723 Py_DECREF(args);
724 args_owned = 0;
725 }
726 args = PyObject_GetItem(dict, key);
727 Py_DECREF(key);
728 if (args == NULL) {
729 goto error;
730 }
731 args_owned = 1;
732 arglen = -1;
733 argidx = -2;
734 }
735
736 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
737 while (--fmtcnt >= 0) {
738 switch (c = *fmt++) {
739 case '-': flags |= F_LJUST; continue;
740 case '+': flags |= F_SIGN; continue;
741 case ' ': flags |= F_BLANK; continue;
742 case '#': flags |= F_ALT; continue;
743 case '0': flags |= F_ZERO; continue;
744 }
745 break;
746 }
747
748 /* Parse width. Example: "%10s" => width=10 */
749 if (c == '*') {
750 v = getnextarg(args, arglen, &argidx);
751 if (v == NULL)
752 goto error;
753 if (!PyLong_Check(v)) {
754 PyErr_SetString(PyExc_TypeError,
755 "* wants int");
756 goto error;
757 }
758 width = PyLong_AsSsize_t(v);
759 if (width == -1 && PyErr_Occurred())
760 goto error;
761 if (width < 0) {
762 flags |= F_LJUST;
763 width = -width;
764 }
765 if (--fmtcnt >= 0)
766 c = *fmt++;
767 }
768 else if (c >= 0 && isdigit(c)) {
769 width = c - '0';
770 while (--fmtcnt >= 0) {
771 c = Py_CHARMASK(*fmt++);
772 if (!isdigit(c))
773 break;
774 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
775 PyErr_SetString(
776 PyExc_ValueError,
777 "width too big");
778 goto error;
779 }
780 width = width*10 + (c - '0');
781 }
782 }
783
784 /* Parse precision. Example: "%.3f" => prec=3 */
785 if (c == '.') {
786 prec = 0;
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 if (c == '*') {
790 v = getnextarg(args, arglen, &argidx);
791 if (v == NULL)
792 goto error;
793 if (!PyLong_Check(v)) {
794 PyErr_SetString(
795 PyExc_TypeError,
796 "* wants int");
797 goto error;
798 }
799 prec = _PyLong_AsInt(v);
800 if (prec == -1 && PyErr_Occurred())
801 goto error;
802 if (prec < 0)
803 prec = 0;
804 if (--fmtcnt >= 0)
805 c = *fmt++;
806 }
807 else if (c >= 0 && isdigit(c)) {
808 prec = c - '0';
809 while (--fmtcnt >= 0) {
810 c = Py_CHARMASK(*fmt++);
811 if (!isdigit(c))
812 break;
813 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
814 PyErr_SetString(
815 PyExc_ValueError,
816 "prec too big");
817 goto error;
818 }
819 prec = prec*10 + (c - '0');
820 }
821 }
822 } /* prec */
823 if (fmtcnt >= 0) {
824 if (c == 'h' || c == 'l' || c == 'L') {
825 if (--fmtcnt >= 0)
826 c = *fmt++;
827 }
828 }
829 if (fmtcnt < 0) {
830 PyErr_SetString(PyExc_ValueError,
831 "incomplete format");
832 goto error;
833 }
834 v = getnextarg(args, arglen, &argidx);
835 if (v == NULL)
836 goto error;
837
838 if (fmtcnt == 0) {
839 /* last write: disable writer overallocation */
840 writer.overallocate = 0;
841 }
842
843 sign = 0;
844 fill = ' ';
845 switch (c) {
846 case 'r':
847 // %r is only for 2/3 code; 3 only code should use %a
848 case 'a':
849 temp = PyObject_ASCII(v);
850 if (temp == NULL)
851 goto error;
852 assert(PyUnicode_IS_ASCII(temp));
853 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
854 len = PyUnicode_GET_LENGTH(temp);
855 if (prec >= 0 && len > prec)
856 len = prec;
857 break;
858
859 case 's':
860 // %s is only for 2/3 code; 3 only code should use %b
861 case 'b':
862 temp = format_obj(v, &pbuf, &len);
863 if (temp == NULL)
864 goto error;
865 if (prec >= 0 && len > prec)
866 len = prec;
867 break;
868
869 case 'i':
870 case 'd':
871 case 'u':
872 case 'o':
873 case 'x':
874 case 'X':
875 if (PyLong_CheckExact(v)
876 && width == -1 && prec == -1
877 && !(flags & (F_SIGN | F_BLANK))
878 && c != 'X')
879 {
880 /* Fast path */
881 int alternate = flags & F_ALT;
882 int base;
883
884 switch(c)
885 {
886 default:
887 Py_UNREACHABLE();
888 case 'd':
889 case 'i':
890 case 'u':
891 base = 10;
892 break;
893 case 'o':
894 base = 8;
895 break;
896 case 'x':
897 case 'X':
898 base = 16;
899 break;
900 }
901
902 /* Fast path */
903 writer.min_size -= 2; /* size preallocated for "%d" */
904 res = _PyLong_FormatBytesWriter(&writer, res,
905 v, base, alternate);
906 if (res == NULL)
907 goto error;
908 continue;
909 }
910
911 temp = formatlong(v, flags, prec, c);
912 if (!temp)
913 goto error;
914 assert(PyUnicode_IS_ASCII(temp));
915 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
916 len = PyUnicode_GET_LENGTH(temp);
917 sign = 1;
918 if (flags & F_ZERO)
919 fill = '0';
920 break;
921
922 case 'e':
923 case 'E':
924 case 'f':
925 case 'F':
926 case 'g':
927 case 'G':
928 if (width == -1 && prec == -1
929 && !(flags & (F_SIGN | F_BLANK)))
930 {
931 /* Fast path */
932 writer.min_size -= 2; /* size preallocated for "%f" */
933 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
934 if (res == NULL)
935 goto error;
936 continue;
937 }
938
939 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
940 goto error;
941 pbuf = PyBytes_AS_STRING(temp);
942 len = PyBytes_GET_SIZE(temp);
943 sign = 1;
944 if (flags & F_ZERO)
945 fill = '0';
946 break;
947
948 case 'c':
949 pbuf = &onechar;
950 len = byte_converter(v, &onechar);
951 if (!len)
952 goto error;
953 if (width == -1) {
954 /* Fast path */
955 *res++ = onechar;
956 continue;
957 }
958 break;
959
960 default:
961 PyErr_Format(PyExc_ValueError,
962 "unsupported format character '%c' (0x%x) "
963 "at index %zd",
964 c, c,
965 (Py_ssize_t)(fmt - 1 - format));
966 goto error;
967 }
968
969 if (sign) {
970 if (*pbuf == '-' || *pbuf == '+') {
971 sign = *pbuf++;
972 len--;
973 }
974 else if (flags & F_SIGN)
975 sign = '+';
976 else if (flags & F_BLANK)
977 sign = ' ';
978 else
979 sign = 0;
980 }
981 if (width < len)
982 width = len;
983
984 alloc = width;
985 if (sign != 0 && len == width)
986 alloc++;
987 /* 2: size preallocated for %s */
988 if (alloc > 2) {
989 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
990 if (res == NULL)
991 goto error;
992 }
993 #ifndef NDEBUG
994 char *before = res;
995 #endif
996
997 /* Write the sign if needed */
998 if (sign) {
999 if (fill != ' ')
1000 *res++ = sign;
1001 if (width > len)
1002 width--;
1003 }
1004
1005 /* Write the numeric prefix for "x", "X" and "o" formats
1006 if the alternate form is used.
1007 For example, write "0x" for the "%#x" format. */
1008 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1009 assert(pbuf[0] == '0');
1010 assert(pbuf[1] == c);
1011 if (fill != ' ') {
1012 *res++ = *pbuf++;
1013 *res++ = *pbuf++;
1014 }
1015 width -= 2;
1016 if (width < 0)
1017 width = 0;
1018 len -= 2;
1019 }
1020
1021 /* Pad left with the fill character if needed */
1022 if (width > len && !(flags & F_LJUST)) {
1023 memset(res, fill, width - len);
1024 res += (width - len);
1025 width = len;
1026 }
1027
1028 /* If padding with spaces: write sign if needed and/or numeric
1029 prefix if the alternate form is used */
1030 if (fill == ' ') {
1031 if (sign)
1032 *res++ = sign;
1033 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1034 assert(pbuf[0] == '0');
1035 assert(pbuf[1] == c);
1036 *res++ = *pbuf++;
1037 *res++ = *pbuf++;
1038 }
1039 }
1040
1041 /* Copy bytes */
1042 memcpy(res, pbuf, len);
1043 res += len;
1044
1045 /* Pad right with the fill character if needed */
1046 if (width > len) {
1047 memset(res, ' ', width - len);
1048 res += (width - len);
1049 }
1050
1051 if (dict && (argidx < arglen)) {
1052 PyErr_SetString(PyExc_TypeError,
1053 "not all arguments converted during bytes formatting");
1054 Py_XDECREF(temp);
1055 goto error;
1056 }
1057 Py_XDECREF(temp);
1058
1059 #ifndef NDEBUG
1060 /* check that we computed the exact size for this write */
1061 assert((res - before) == alloc);
1062 #endif
1063 } /* '%' */
1064
1065 /* If overallocation was disabled, ensure that it was the last
1066 write. Otherwise, we missed an optimization */
1067 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1068 } /* until end */
1069
1070 if (argidx < arglen && !dict) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "not all arguments converted during bytes formatting");
1073 goto error;
1074 }
1075
1076 if (args_owned) {
1077 Py_DECREF(args);
1078 }
1079 return _PyBytesWriter_Finish(&writer, res);
1080
1081 error:
1082 _PyBytesWriter_Dealloc(&writer);
1083 if (args_owned) {
1084 Py_DECREF(args);
1085 }
1086 return NULL;
1087 }
1088
1089 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1090 PyObject *_PyBytes_DecodeEscape(const char *s,
1091 Py_ssize_t len,
1092 const char *errors,
1093 const char **first_invalid_escape)
1094 {
1095 int c;
1096 char *p;
1097 const char *end;
1098 _PyBytesWriter writer;
1099
1100 _PyBytesWriter_Init(&writer);
1101
1102 p = _PyBytesWriter_Alloc(&writer, len);
1103 if (p == NULL)
1104 return NULL;
1105 writer.overallocate = 1;
1106
1107 *first_invalid_escape = NULL;
1108
1109 end = s + len;
1110 while (s < end) {
1111 if (*s != '\\') {
1112 *p++ = *s++;
1113 continue;
1114 }
1115
1116 s++;
1117 if (s == end) {
1118 PyErr_SetString(PyExc_ValueError,
1119 "Trailing \\ in string");
1120 goto failed;
1121 }
1122
1123 switch (*s++) {
1124 /* XXX This assumes ASCII! */
1125 case '\n': break;
1126 case '\\': *p++ = '\\'; break;
1127 case '\'': *p++ = '\''; break;
1128 case '\"': *p++ = '\"'; break;
1129 case 'b': *p++ = '\b'; break;
1130 case 'f': *p++ = '\014'; break; /* FF */
1131 case 't': *p++ = '\t'; break;
1132 case 'n': *p++ = '\n'; break;
1133 case 'r': *p++ = '\r'; break;
1134 case 'v': *p++ = '\013'; break; /* VT */
1135 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1136 case '0': case '1': case '2': case '3':
1137 case '4': case '5': case '6': case '7':
1138 c = s[-1] - '0';
1139 if (s < end && '0' <= *s && *s <= '7') {
1140 c = (c<<3) + *s++ - '0';
1141 if (s < end && '0' <= *s && *s <= '7')
1142 c = (c<<3) + *s++ - '0';
1143 }
1144 *p++ = c;
1145 break;
1146 case 'x':
1147 if (s+1 < end) {
1148 int digit1, digit2;
1149 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1150 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1151 if (digit1 < 16 && digit2 < 16) {
1152 *p++ = (unsigned char)((digit1 << 4) + digit2);
1153 s += 2;
1154 break;
1155 }
1156 }
1157 /* invalid hexadecimal digits */
1158
1159 if (!errors || strcmp(errors, "strict") == 0) {
1160 PyErr_Format(PyExc_ValueError,
1161 "invalid \\x escape at position %zd",
1162 s - 2 - (end - len));
1163 goto failed;
1164 }
1165 if (strcmp(errors, "replace") == 0) {
1166 *p++ = '?';
1167 } else if (strcmp(errors, "ignore") == 0)
1168 /* do nothing */;
1169 else {
1170 PyErr_Format(PyExc_ValueError,
1171 "decoding error; unknown "
1172 "error handling code: %.400s",
1173 errors);
1174 goto failed;
1175 }
1176 /* skip \x */
1177 if (s < end && Py_ISXDIGIT(s[0]))
1178 s++; /* and a hexdigit */
1179 break;
1180
1181 default:
1182 if (*first_invalid_escape == NULL) {
1183 *first_invalid_escape = s-1; /* Back up one char, since we've
1184 already incremented s. */
1185 }
1186 *p++ = '\\';
1187 s--;
1188 }
1189 }
1190
1191 return _PyBytesWriter_Finish(&writer, p);
1192
1193 failed:
1194 _PyBytesWriter_Dealloc(&writer);
1195 return NULL;
1196 }
1197
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1198 PyObject *PyBytes_DecodeEscape(const char *s,
1199 Py_ssize_t len,
1200 const char *errors,
1201 Py_ssize_t Py_UNUSED(unicode),
1202 const char *Py_UNUSED(recode_encoding))
1203 {
1204 const char* first_invalid_escape;
1205 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1206 &first_invalid_escape);
1207 if (result == NULL)
1208 return NULL;
1209 if (first_invalid_escape != NULL) {
1210 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1211 "invalid escape sequence '\\%c'",
1212 (unsigned char)*first_invalid_escape) < 0) {
1213 Py_DECREF(result);
1214 return NULL;
1215 }
1216 }
1217 return result;
1218
1219 }
1220 /* -------------------------------------------------------------------- */
1221 /* object api */
1222
1223 Py_ssize_t
PyBytes_Size(PyObject * op)1224 PyBytes_Size(PyObject *op)
1225 {
1226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return -1;
1230 }
1231 return Py_SIZE(op);
1232 }
1233
1234 char *
PyBytes_AsString(PyObject * op)1235 PyBytes_AsString(PyObject *op)
1236 {
1237 if (!PyBytes_Check(op)) {
1238 PyErr_Format(PyExc_TypeError,
1239 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1240 return NULL;
1241 }
1242 return ((PyBytesObject *)op)->ob_sval;
1243 }
1244
1245 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1246 PyBytes_AsStringAndSize(PyObject *obj,
1247 char **s,
1248 Py_ssize_t *len)
1249 {
1250 if (s == NULL) {
1251 PyErr_BadInternalCall();
1252 return -1;
1253 }
1254
1255 if (!PyBytes_Check(obj)) {
1256 PyErr_Format(PyExc_TypeError,
1257 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1258 return -1;
1259 }
1260
1261 *s = PyBytes_AS_STRING(obj);
1262 if (len != NULL)
1263 *len = PyBytes_GET_SIZE(obj);
1264 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1265 PyErr_SetString(PyExc_ValueError,
1266 "embedded null byte");
1267 return -1;
1268 }
1269 return 0;
1270 }
1271
1272 /* -------------------------------------------------------------------- */
1273 /* Methods */
1274
1275 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1276
1277 #include "stringlib/stringdefs.h"
1278
1279 #include "stringlib/fastsearch.h"
1280 #include "stringlib/count.h"
1281 #include "stringlib/find.h"
1282 #include "stringlib/join.h"
1283 #include "stringlib/partition.h"
1284 #include "stringlib/split.h"
1285 #include "stringlib/ctype.h"
1286
1287 #include "stringlib/transmogrify.h"
1288
1289 #undef STRINGLIB_GET_EMPTY
1290
1291 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1292 PyBytes_Repr(PyObject *obj, int smartquotes)
1293 {
1294 PyBytesObject* op = (PyBytesObject*) obj;
1295 Py_ssize_t i, length = Py_SIZE(op);
1296 Py_ssize_t newsize, squotes, dquotes;
1297 PyObject *v;
1298 unsigned char quote;
1299 const unsigned char *s;
1300 Py_UCS1 *p;
1301
1302 /* Compute size of output string */
1303 squotes = dquotes = 0;
1304 newsize = 3; /* b'' */
1305 s = (const unsigned char*)op->ob_sval;
1306 for (i = 0; i < length; i++) {
1307 Py_ssize_t incr = 1;
1308 switch(s[i]) {
1309 case '\'': squotes++; break;
1310 case '"': dquotes++; break;
1311 case '\\': case '\t': case '\n': case '\r':
1312 incr = 2; break; /* \C */
1313 default:
1314 if (s[i] < ' ' || s[i] >= 0x7f)
1315 incr = 4; /* \xHH */
1316 }
1317 if (newsize > PY_SSIZE_T_MAX - incr)
1318 goto overflow;
1319 newsize += incr;
1320 }
1321 quote = '\'';
1322 if (smartquotes && squotes && !dquotes)
1323 quote = '"';
1324 if (squotes && quote == '\'') {
1325 if (newsize > PY_SSIZE_T_MAX - squotes)
1326 goto overflow;
1327 newsize += squotes;
1328 }
1329
1330 v = PyUnicode_New(newsize, 127);
1331 if (v == NULL) {
1332 return NULL;
1333 }
1334 p = PyUnicode_1BYTE_DATA(v);
1335
1336 *p++ = 'b', *p++ = quote;
1337 for (i = 0; i < length; i++) {
1338 unsigned char c = op->ob_sval[i];
1339 if (c == quote || c == '\\')
1340 *p++ = '\\', *p++ = c;
1341 else if (c == '\t')
1342 *p++ = '\\', *p++ = 't';
1343 else if (c == '\n')
1344 *p++ = '\\', *p++ = 'n';
1345 else if (c == '\r')
1346 *p++ = '\\', *p++ = 'r';
1347 else if (c < ' ' || c >= 0x7f) {
1348 *p++ = '\\';
1349 *p++ = 'x';
1350 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1351 *p++ = Py_hexdigits[c & 0xf];
1352 }
1353 else
1354 *p++ = c;
1355 }
1356 *p++ = quote;
1357 assert(_PyUnicode_CheckConsistency(v, 1));
1358 return v;
1359
1360 overflow:
1361 PyErr_SetString(PyExc_OverflowError,
1362 "bytes object is too large to make repr");
1363 return NULL;
1364 }
1365
1366 static PyObject *
bytes_repr(PyObject * op)1367 bytes_repr(PyObject *op)
1368 {
1369 return PyBytes_Repr(op, 1);
1370 }
1371
1372 static PyObject *
bytes_str(PyObject * op)1373 bytes_str(PyObject *op)
1374 {
1375 if (_Py_GetConfig()->bytes_warning) {
1376 if (PyErr_WarnEx(PyExc_BytesWarning,
1377 "str() on a bytes instance", 1)) {
1378 return NULL;
1379 }
1380 }
1381 return bytes_repr(op);
1382 }
1383
1384 static Py_ssize_t
bytes_length(PyBytesObject * a)1385 bytes_length(PyBytesObject *a)
1386 {
1387 return Py_SIZE(a);
1388 }
1389
1390 /* This is also used by PyBytes_Concat() */
1391 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1392 bytes_concat(PyObject *a, PyObject *b)
1393 {
1394 Py_buffer va, vb;
1395 PyObject *result = NULL;
1396
1397 va.len = -1;
1398 vb.len = -1;
1399 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1400 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1401 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1402 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1403 goto done;
1404 }
1405
1406 /* Optimize end cases */
1407 if (va.len == 0 && PyBytes_CheckExact(b)) {
1408 result = b;
1409 Py_INCREF(result);
1410 goto done;
1411 }
1412 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1413 result = a;
1414 Py_INCREF(result);
1415 goto done;
1416 }
1417
1418 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1419 PyErr_NoMemory();
1420 goto done;
1421 }
1422
1423 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1424 if (result != NULL) {
1425 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1426 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1427 }
1428
1429 done:
1430 if (va.len != -1)
1431 PyBuffer_Release(&va);
1432 if (vb.len != -1)
1433 PyBuffer_Release(&vb);
1434 return result;
1435 }
1436
1437 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1438 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1439 {
1440 Py_ssize_t i;
1441 Py_ssize_t j;
1442 Py_ssize_t size;
1443 PyBytesObject *op;
1444 size_t nbytes;
1445 if (n < 0)
1446 n = 0;
1447 /* watch out for overflows: the size can overflow int,
1448 * and the # of bytes needed can overflow size_t
1449 */
1450 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1451 PyErr_SetString(PyExc_OverflowError,
1452 "repeated bytes are too long");
1453 return NULL;
1454 }
1455 size = Py_SIZE(a) * n;
1456 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1457 Py_INCREF(a);
1458 return (PyObject *)a;
1459 }
1460 nbytes = (size_t)size;
1461 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1462 PyErr_SetString(PyExc_OverflowError,
1463 "repeated bytes are too long");
1464 return NULL;
1465 }
1466 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1467 if (op == NULL) {
1468 return PyErr_NoMemory();
1469 }
1470 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1471 op->ob_shash = -1;
1472 op->ob_sval[size] = '\0';
1473 if (Py_SIZE(a) == 1 && n > 0) {
1474 memset(op->ob_sval, a->ob_sval[0] , n);
1475 return (PyObject *) op;
1476 }
1477 i = 0;
1478 if (i < size) {
1479 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1480 i = Py_SIZE(a);
1481 }
1482 while (i < size) {
1483 j = (i <= size-i) ? i : size-i;
1484 memcpy(op->ob_sval+i, op->ob_sval, j);
1485 i += j;
1486 }
1487 return (PyObject *) op;
1488 }
1489
1490 static int
bytes_contains(PyObject * self,PyObject * arg)1491 bytes_contains(PyObject *self, PyObject *arg)
1492 {
1493 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1494 }
1495
1496 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1497 bytes_item(PyBytesObject *a, Py_ssize_t i)
1498 {
1499 if (i < 0 || i >= Py_SIZE(a)) {
1500 PyErr_SetString(PyExc_IndexError, "index out of range");
1501 return NULL;
1502 }
1503 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1504 }
1505
1506 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1507 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1508 {
1509 int cmp;
1510 Py_ssize_t len;
1511
1512 len = Py_SIZE(a);
1513 if (Py_SIZE(b) != len)
1514 return 0;
1515
1516 if (a->ob_sval[0] != b->ob_sval[0])
1517 return 0;
1518
1519 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520 return (cmp == 0);
1521 }
1522
1523 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1524 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1525 {
1526 int c;
1527 Py_ssize_t len_a, len_b;
1528 Py_ssize_t min_len;
1529
1530 /* Make sure both arguments are strings. */
1531 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1532 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1533 if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1534 if (PyErr_WarnEx(PyExc_BytesWarning,
1535 "Comparison between bytes and string", 1))
1536 return NULL;
1537 }
1538 if (PyLong_Check(a) || PyLong_Check(b)) {
1539 if (PyErr_WarnEx(PyExc_BytesWarning,
1540 "Comparison between bytes and int", 1))
1541 return NULL;
1542 }
1543 }
1544 Py_RETURN_NOTIMPLEMENTED;
1545 }
1546 else if (a == b) {
1547 switch (op) {
1548 case Py_EQ:
1549 case Py_LE:
1550 case Py_GE:
1551 /* a byte string is equal to itself */
1552 Py_RETURN_TRUE;
1553 case Py_NE:
1554 case Py_LT:
1555 case Py_GT:
1556 Py_RETURN_FALSE;
1557 default:
1558 PyErr_BadArgument();
1559 return NULL;
1560 }
1561 }
1562 else if (op == Py_EQ || op == Py_NE) {
1563 int eq = bytes_compare_eq(a, b);
1564 eq ^= (op == Py_NE);
1565 return PyBool_FromLong(eq);
1566 }
1567 else {
1568 len_a = Py_SIZE(a);
1569 len_b = Py_SIZE(b);
1570 min_len = Py_MIN(len_a, len_b);
1571 if (min_len > 0) {
1572 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1573 if (c == 0)
1574 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1575 }
1576 else
1577 c = 0;
1578 if (c != 0)
1579 Py_RETURN_RICHCOMPARE(c, 0, op);
1580 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1581 }
1582 }
1583
1584 static Py_hash_t
bytes_hash(PyBytesObject * a)1585 bytes_hash(PyBytesObject *a)
1586 {
1587 if (a->ob_shash == -1) {
1588 /* Can't fail */
1589 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1590 }
1591 return a->ob_shash;
1592 }
1593
1594 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1595 bytes_subscript(PyBytesObject* self, PyObject* item)
1596 {
1597 if (_PyIndex_Check(item)) {
1598 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1599 if (i == -1 && PyErr_Occurred())
1600 return NULL;
1601 if (i < 0)
1602 i += PyBytes_GET_SIZE(self);
1603 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1604 PyErr_SetString(PyExc_IndexError,
1605 "index out of range");
1606 return NULL;
1607 }
1608 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1609 }
1610 else if (PySlice_Check(item)) {
1611 Py_ssize_t start, stop, step, slicelength, i;
1612 size_t cur;
1613 const char* source_buf;
1614 char* result_buf;
1615 PyObject* result;
1616
1617 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1618 return NULL;
1619 }
1620 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1621 &stop, step);
1622
1623 if (slicelength <= 0) {
1624 return PyBytes_FromStringAndSize("", 0);
1625 }
1626 else if (start == 0 && step == 1 &&
1627 slicelength == PyBytes_GET_SIZE(self) &&
1628 PyBytes_CheckExact(self)) {
1629 Py_INCREF(self);
1630 return (PyObject *)self;
1631 }
1632 else if (step == 1) {
1633 return PyBytes_FromStringAndSize(
1634 PyBytes_AS_STRING(self) + start,
1635 slicelength);
1636 }
1637 else {
1638 source_buf = PyBytes_AS_STRING(self);
1639 result = PyBytes_FromStringAndSize(NULL, slicelength);
1640 if (result == NULL)
1641 return NULL;
1642
1643 result_buf = PyBytes_AS_STRING(result);
1644 for (cur = start, i = 0; i < slicelength;
1645 cur += step, i++) {
1646 result_buf[i] = source_buf[cur];
1647 }
1648
1649 return result;
1650 }
1651 }
1652 else {
1653 PyErr_Format(PyExc_TypeError,
1654 "byte indices must be integers or slices, not %.200s",
1655 Py_TYPE(item)->tp_name);
1656 return NULL;
1657 }
1658 }
1659
1660 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1661 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1662 {
1663 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1664 1, flags);
1665 }
1666
1667 static PySequenceMethods bytes_as_sequence = {
1668 (lenfunc)bytes_length, /*sq_length*/
1669 (binaryfunc)bytes_concat, /*sq_concat*/
1670 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1671 (ssizeargfunc)bytes_item, /*sq_item*/
1672 0, /*sq_slice*/
1673 0, /*sq_ass_item*/
1674 0, /*sq_ass_slice*/
1675 (objobjproc)bytes_contains /*sq_contains*/
1676 };
1677
1678 static PyMappingMethods bytes_as_mapping = {
1679 (lenfunc)bytes_length,
1680 (binaryfunc)bytes_subscript,
1681 0,
1682 };
1683
1684 static PyBufferProcs bytes_as_buffer = {
1685 (getbufferproc)bytes_buffer_getbuffer,
1686 NULL,
1687 };
1688
1689
1690 #define LEFTSTRIP 0
1691 #define RIGHTSTRIP 1
1692 #define BOTHSTRIP 2
1693
1694 /*[clinic input]
1695 bytes.split
1696
1697 sep: object = None
1698 The delimiter according which to split the bytes.
1699 None (the default value) means split on ASCII whitespace characters
1700 (space, tab, return, newline, formfeed, vertical tab).
1701 maxsplit: Py_ssize_t = -1
1702 Maximum number of splits to do.
1703 -1 (the default value) means no limit.
1704
1705 Return a list of the sections in the bytes, using sep as the delimiter.
1706 [clinic start generated code]*/
1707
1708 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1709 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1710 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1711 {
1712 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1713 const char *s = PyBytes_AS_STRING(self), *sub;
1714 Py_buffer vsub;
1715 PyObject *list;
1716
1717 if (maxsplit < 0)
1718 maxsplit = PY_SSIZE_T_MAX;
1719 if (sep == Py_None)
1720 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1721 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1722 return NULL;
1723 sub = vsub.buf;
1724 n = vsub.len;
1725
1726 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1727 PyBuffer_Release(&vsub);
1728 return list;
1729 }
1730
1731 /*[clinic input]
1732 bytes.partition
1733
1734 sep: Py_buffer
1735 /
1736
1737 Partition the bytes into three parts using the given separator.
1738
1739 This will search for the separator sep in the bytes. If the separator is found,
1740 returns a 3-tuple containing the part before the separator, the separator
1741 itself, and the part after it.
1742
1743 If the separator is not found, returns a 3-tuple containing the original bytes
1744 object and two empty bytes objects.
1745 [clinic start generated code]*/
1746
1747 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1748 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1749 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1750 {
1751 return stringlib_partition(
1752 (PyObject*) self,
1753 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1754 sep->obj, (const char *)sep->buf, sep->len
1755 );
1756 }
1757
1758 /*[clinic input]
1759 bytes.rpartition
1760
1761 sep: Py_buffer
1762 /
1763
1764 Partition the bytes into three parts using the given separator.
1765
1766 This will search for the separator sep in the bytes, starting at the end. If
1767 the separator is found, returns a 3-tuple containing the part before the
1768 separator, the separator itself, and the part after it.
1769
1770 If the separator is not found, returns a 3-tuple containing two empty bytes
1771 objects and the original bytes object.
1772 [clinic start generated code]*/
1773
1774 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1775 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1776 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1777 {
1778 return stringlib_rpartition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1781 sep->obj, (const char *)sep->buf, sep->len
1782 );
1783 }
1784
1785 /*[clinic input]
1786 bytes.rsplit = bytes.split
1787
1788 Return a list of the sections in the bytes, using sep as the delimiter.
1789
1790 Splitting is done starting at the end of the bytes and working to the front.
1791 [clinic start generated code]*/
1792
1793 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1794 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1795 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1796 {
1797 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1798 const char *s = PyBytes_AS_STRING(self), *sub;
1799 Py_buffer vsub;
1800 PyObject *list;
1801
1802 if (maxsplit < 0)
1803 maxsplit = PY_SSIZE_T_MAX;
1804 if (sep == Py_None)
1805 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1806 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1807 return NULL;
1808 sub = vsub.buf;
1809 n = vsub.len;
1810
1811 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1812 PyBuffer_Release(&vsub);
1813 return list;
1814 }
1815
1816
1817 /*[clinic input]
1818 bytes.join
1819
1820 iterable_of_bytes: object
1821 /
1822
1823 Concatenate any number of bytes objects.
1824
1825 The bytes whose method is called is inserted in between each pair.
1826
1827 The result is returned as a new bytes object.
1828
1829 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1830 [clinic start generated code]*/
1831
1832 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1833 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1834 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1835 {
1836 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1837 }
1838
1839 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1840 _PyBytes_Join(PyObject *sep, PyObject *x)
1841 {
1842 assert(sep != NULL && PyBytes_Check(sep));
1843 assert(x != NULL);
1844 return bytes_join((PyBytesObject*)sep, x);
1845 }
1846
1847 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1848 bytes_find(PyBytesObject *self, PyObject *args)
1849 {
1850 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851 }
1852
1853 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1854 bytes_index(PyBytesObject *self, PyObject *args)
1855 {
1856 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857 }
1858
1859
1860 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1861 bytes_rfind(PyBytesObject *self, PyObject *args)
1862 {
1863 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1864 }
1865
1866
1867 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1868 bytes_rindex(PyBytesObject *self, PyObject *args)
1869 {
1870 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871 }
1872
1873
1874 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1875 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1876 {
1877 Py_buffer vsep;
1878 const char *s = PyBytes_AS_STRING(self);
1879 Py_ssize_t len = PyBytes_GET_SIZE(self);
1880 char *sep;
1881 Py_ssize_t seplen;
1882 Py_ssize_t i, j;
1883
1884 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1885 return NULL;
1886 sep = vsep.buf;
1887 seplen = vsep.len;
1888
1889 i = 0;
1890 if (striptype != RIGHTSTRIP) {
1891 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892 i++;
1893 }
1894 }
1895
1896 j = len;
1897 if (striptype != LEFTSTRIP) {
1898 do {
1899 j--;
1900 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901 j++;
1902 }
1903
1904 PyBuffer_Release(&vsep);
1905
1906 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1907 Py_INCREF(self);
1908 return (PyObject*)self;
1909 }
1910 else
1911 return PyBytes_FromStringAndSize(s+i, j-i);
1912 }
1913
1914
1915 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1916 do_strip(PyBytesObject *self, int striptype)
1917 {
1918 const char *s = PyBytes_AS_STRING(self);
1919 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1920
1921 i = 0;
1922 if (striptype != RIGHTSTRIP) {
1923 while (i < len && Py_ISSPACE(s[i])) {
1924 i++;
1925 }
1926 }
1927
1928 j = len;
1929 if (striptype != LEFTSTRIP) {
1930 do {
1931 j--;
1932 } while (j >= i && Py_ISSPACE(s[j]));
1933 j++;
1934 }
1935
1936 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1937 Py_INCREF(self);
1938 return (PyObject*)self;
1939 }
1940 else
1941 return PyBytes_FromStringAndSize(s+i, j-i);
1942 }
1943
1944
1945 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1946 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1947 {
1948 if (bytes != Py_None) {
1949 return do_xstrip(self, striptype, bytes);
1950 }
1951 return do_strip(self, striptype);
1952 }
1953
1954 /*[clinic input]
1955 bytes.strip
1956
1957 bytes: object = None
1958 /
1959
1960 Strip leading and trailing bytes contained in the argument.
1961
1962 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1963 [clinic start generated code]*/
1964
1965 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1966 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1967 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1968 {
1969 return do_argstrip(self, BOTHSTRIP, bytes);
1970 }
1971
1972 /*[clinic input]
1973 bytes.lstrip
1974
1975 bytes: object = None
1976 /
1977
1978 Strip leading bytes contained in the argument.
1979
1980 If the argument is omitted or None, strip leading ASCII whitespace.
1981 [clinic start generated code]*/
1982
1983 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)1984 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1985 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
1986 {
1987 return do_argstrip(self, LEFTSTRIP, bytes);
1988 }
1989
1990 /*[clinic input]
1991 bytes.rstrip
1992
1993 bytes: object = None
1994 /
1995
1996 Strip trailing bytes contained in the argument.
1997
1998 If the argument is omitted or None, strip trailing ASCII whitespace.
1999 [clinic start generated code]*/
2000
2001 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2002 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2003 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2004 {
2005 return do_argstrip(self, RIGHTSTRIP, bytes);
2006 }
2007
2008
2009 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2010 bytes_count(PyBytesObject *self, PyObject *args)
2011 {
2012 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2013 }
2014
2015
2016 /*[clinic input]
2017 bytes.translate
2018
2019 table: object
2020 Translation table, which must be a bytes object of length 256.
2021 /
2022 delete as deletechars: object(c_default="NULL") = b''
2023
2024 Return a copy with each character mapped by the given translation table.
2025
2026 All characters occurring in the optional argument delete are removed.
2027 The remaining characters are mapped through the given translation table.
2028 [clinic start generated code]*/
2029
2030 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2031 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2032 PyObject *deletechars)
2033 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2034 {
2035 const char *input;
2036 char *output;
2037 Py_buffer table_view = {NULL, NULL};
2038 Py_buffer del_table_view = {NULL, NULL};
2039 const char *table_chars;
2040 Py_ssize_t i, c, changed = 0;
2041 PyObject *input_obj = (PyObject*)self;
2042 const char *output_start, *del_table_chars=NULL;
2043 Py_ssize_t inlen, tablen, dellen = 0;
2044 PyObject *result;
2045 int trans_table[256];
2046
2047 if (PyBytes_Check(table)) {
2048 table_chars = PyBytes_AS_STRING(table);
2049 tablen = PyBytes_GET_SIZE(table);
2050 }
2051 else if (table == Py_None) {
2052 table_chars = NULL;
2053 tablen = 256;
2054 }
2055 else {
2056 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2057 return NULL;
2058 table_chars = table_view.buf;
2059 tablen = table_view.len;
2060 }
2061
2062 if (tablen != 256) {
2063 PyErr_SetString(PyExc_ValueError,
2064 "translation table must be 256 characters long");
2065 PyBuffer_Release(&table_view);
2066 return NULL;
2067 }
2068
2069 if (deletechars != NULL) {
2070 if (PyBytes_Check(deletechars)) {
2071 del_table_chars = PyBytes_AS_STRING(deletechars);
2072 dellen = PyBytes_GET_SIZE(deletechars);
2073 }
2074 else {
2075 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2076 PyBuffer_Release(&table_view);
2077 return NULL;
2078 }
2079 del_table_chars = del_table_view.buf;
2080 dellen = del_table_view.len;
2081 }
2082 }
2083 else {
2084 del_table_chars = NULL;
2085 dellen = 0;
2086 }
2087
2088 inlen = PyBytes_GET_SIZE(input_obj);
2089 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2090 if (result == NULL) {
2091 PyBuffer_Release(&del_table_view);
2092 PyBuffer_Release(&table_view);
2093 return NULL;
2094 }
2095 output_start = output = PyBytes_AS_STRING(result);
2096 input = PyBytes_AS_STRING(input_obj);
2097
2098 if (dellen == 0 && table_chars != NULL) {
2099 /* If no deletions are required, use faster code */
2100 for (i = inlen; --i >= 0; ) {
2101 c = Py_CHARMASK(*input++);
2102 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2103 changed = 1;
2104 }
2105 if (!changed && PyBytes_CheckExact(input_obj)) {
2106 Py_INCREF(input_obj);
2107 Py_DECREF(result);
2108 result = input_obj;
2109 }
2110 PyBuffer_Release(&del_table_view);
2111 PyBuffer_Release(&table_view);
2112 return result;
2113 }
2114
2115 if (table_chars == NULL) {
2116 for (i = 0; i < 256; i++)
2117 trans_table[i] = Py_CHARMASK(i);
2118 } else {
2119 for (i = 0; i < 256; i++)
2120 trans_table[i] = Py_CHARMASK(table_chars[i]);
2121 }
2122 PyBuffer_Release(&table_view);
2123
2124 for (i = 0; i < dellen; i++)
2125 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2126 PyBuffer_Release(&del_table_view);
2127
2128 for (i = inlen; --i >= 0; ) {
2129 c = Py_CHARMASK(*input++);
2130 if (trans_table[c] != -1)
2131 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2132 continue;
2133 changed = 1;
2134 }
2135 if (!changed && PyBytes_CheckExact(input_obj)) {
2136 Py_DECREF(result);
2137 Py_INCREF(input_obj);
2138 return input_obj;
2139 }
2140 /* Fix the size of the resulting byte string */
2141 if (inlen > 0)
2142 _PyBytes_Resize(&result, output - output_start);
2143 return result;
2144 }
2145
2146
2147 /*[clinic input]
2148
2149 @staticmethod
2150 bytes.maketrans
2151
2152 frm: Py_buffer
2153 to: Py_buffer
2154 /
2155
2156 Return a translation table useable for the bytes or bytearray translate method.
2157
2158 The returned table will be one where each byte in frm is mapped to the byte at
2159 the same position in to.
2160
2161 The bytes objects frm and to must be of the same length.
2162 [clinic start generated code]*/
2163
2164 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2165 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2166 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2167 {
2168 return _Py_bytes_maketrans(frm, to);
2169 }
2170
2171
2172 /*[clinic input]
2173 bytes.replace
2174
2175 old: Py_buffer
2176 new: Py_buffer
2177 count: Py_ssize_t = -1
2178 Maximum number of occurrences to replace.
2179 -1 (the default value) means replace all occurrences.
2180 /
2181
2182 Return a copy with all occurrences of substring old replaced by new.
2183
2184 If the optional argument count is given, only the first count occurrences are
2185 replaced.
2186 [clinic start generated code]*/
2187
2188 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2189 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2190 Py_ssize_t count)
2191 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2192 {
2193 return stringlib_replace((PyObject *)self,
2194 (const char *)old->buf, old->len,
2195 (const char *)new->buf, new->len, count);
2196 }
2197
2198 /** End DALKE **/
2199
2200 /*[clinic input]
2201 bytes.removeprefix as bytes_removeprefix
2202
2203 prefix: Py_buffer
2204 /
2205
2206 Return a bytes object with the given prefix string removed if present.
2207
2208 If the bytes starts with the prefix string, return bytes[len(prefix):].
2209 Otherwise, return a copy of the original bytes.
2210 [clinic start generated code]*/
2211
2212 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2213 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2214 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2215 {
2216 const char *self_start = PyBytes_AS_STRING(self);
2217 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2218 const char *prefix_start = prefix->buf;
2219 Py_ssize_t prefix_len = prefix->len;
2220
2221 if (self_len >= prefix_len
2222 && prefix_len > 0
2223 && memcmp(self_start, prefix_start, prefix_len) == 0)
2224 {
2225 return PyBytes_FromStringAndSize(self_start + prefix_len,
2226 self_len - prefix_len);
2227 }
2228
2229 if (PyBytes_CheckExact(self)) {
2230 Py_INCREF(self);
2231 return (PyObject *)self;
2232 }
2233
2234 return PyBytes_FromStringAndSize(self_start, self_len);
2235 }
2236
2237 /*[clinic input]
2238 bytes.removesuffix as bytes_removesuffix
2239
2240 suffix: Py_buffer
2241 /
2242
2243 Return a bytes object with the given suffix string removed if present.
2244
2245 If the bytes ends with the suffix string and that suffix is not empty,
2246 return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2247 bytes.
2248 [clinic start generated code]*/
2249
2250 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2251 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2252 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2253 {
2254 const char *self_start = PyBytes_AS_STRING(self);
2255 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2256 const char *suffix_start = suffix->buf;
2257 Py_ssize_t suffix_len = suffix->len;
2258
2259 if (self_len >= suffix_len
2260 && suffix_len > 0
2261 && memcmp(self_start + self_len - suffix_len,
2262 suffix_start, suffix_len) == 0)
2263 {
2264 return PyBytes_FromStringAndSize(self_start,
2265 self_len - suffix_len);
2266 }
2267
2268 if (PyBytes_CheckExact(self)) {
2269 Py_INCREF(self);
2270 return (PyObject *)self;
2271 }
2272
2273 return PyBytes_FromStringAndSize(self_start, self_len);
2274 }
2275
2276 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2277 bytes_startswith(PyBytesObject *self, PyObject *args)
2278 {
2279 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2280 }
2281
2282 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2283 bytes_endswith(PyBytesObject *self, PyObject *args)
2284 {
2285 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2286 }
2287
2288
2289 /*[clinic input]
2290 bytes.decode
2291
2292 encoding: str(c_default="NULL") = 'utf-8'
2293 The encoding with which to decode the bytes.
2294 errors: str(c_default="NULL") = 'strict'
2295 The error handling scheme to use for the handling of decoding errors.
2296 The default is 'strict' meaning that decoding errors raise a
2297 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2298 as well as any other name registered with codecs.register_error that
2299 can handle UnicodeDecodeErrors.
2300
2301 Decode the bytes using the codec registered for encoding.
2302 [clinic start generated code]*/
2303
2304 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2305 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2306 const char *errors)
2307 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2308 {
2309 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2310 }
2311
2312
2313 /*[clinic input]
2314 bytes.splitlines
2315
2316 keepends: bool(accept={int}) = False
2317
2318 Return a list of the lines in the bytes, breaking at line boundaries.
2319
2320 Line breaks are not included in the resulting list unless keepends is given and
2321 true.
2322 [clinic start generated code]*/
2323
2324 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2325 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2326 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2327 {
2328 return stringlib_splitlines(
2329 (PyObject*) self, PyBytes_AS_STRING(self),
2330 PyBytes_GET_SIZE(self), keepends
2331 );
2332 }
2333
2334 /*[clinic input]
2335 @classmethod
2336 bytes.fromhex
2337
2338 string: unicode
2339 /
2340
2341 Create a bytes object from a string of hexadecimal numbers.
2342
2343 Spaces between two numbers are accepted.
2344 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2345 [clinic start generated code]*/
2346
2347 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2348 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2349 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2350 {
2351 PyObject *result = _PyBytes_FromHex(string, 0);
2352 if (type != &PyBytes_Type && result != NULL) {
2353 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2354 }
2355 return result;
2356 }
2357
2358 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2359 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2360 {
2361 char *buf;
2362 Py_ssize_t hexlen, invalid_char;
2363 unsigned int top, bot;
2364 const Py_UCS1 *str, *end;
2365 _PyBytesWriter writer;
2366
2367 _PyBytesWriter_Init(&writer);
2368 writer.use_bytearray = use_bytearray;
2369
2370 assert(PyUnicode_Check(string));
2371 if (PyUnicode_READY(string))
2372 return NULL;
2373 hexlen = PyUnicode_GET_LENGTH(string);
2374
2375 if (!PyUnicode_IS_ASCII(string)) {
2376 const void *data = PyUnicode_DATA(string);
2377 unsigned int kind = PyUnicode_KIND(string);
2378 Py_ssize_t i;
2379
2380 /* search for the first non-ASCII character */
2381 for (i = 0; i < hexlen; i++) {
2382 if (PyUnicode_READ(kind, data, i) >= 128)
2383 break;
2384 }
2385 invalid_char = i;
2386 goto error;
2387 }
2388
2389 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2390 str = PyUnicode_1BYTE_DATA(string);
2391
2392 /* This overestimates if there are spaces */
2393 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2394 if (buf == NULL)
2395 return NULL;
2396
2397 end = str + hexlen;
2398 while (str < end) {
2399 /* skip over spaces in the input */
2400 if (Py_ISSPACE(*str)) {
2401 do {
2402 str++;
2403 } while (Py_ISSPACE(*str));
2404 if (str >= end)
2405 break;
2406 }
2407
2408 top = _PyLong_DigitValue[*str];
2409 if (top >= 16) {
2410 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2411 goto error;
2412 }
2413 str++;
2414
2415 bot = _PyLong_DigitValue[*str];
2416 if (bot >= 16) {
2417 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2418 goto error;
2419 }
2420 str++;
2421
2422 *buf++ = (unsigned char)((top << 4) + bot);
2423 }
2424
2425 return _PyBytesWriter_Finish(&writer, buf);
2426
2427 error:
2428 PyErr_Format(PyExc_ValueError,
2429 "non-hexadecimal number found in "
2430 "fromhex() arg at position %zd", invalid_char);
2431 _PyBytesWriter_Dealloc(&writer);
2432 return NULL;
2433 }
2434
2435 /*[clinic input]
2436 bytes.hex
2437
2438 sep: object = NULL
2439 An optional single character or byte to separate hex bytes.
2440 bytes_per_sep: int = 1
2441 How many bytes between separators. Positive values count from the
2442 right, negative values count from the left.
2443
2444 Create a string of hexadecimal numbers from a bytes object.
2445
2446 Example:
2447 >>> value = b'\xb9\x01\xef'
2448 >>> value.hex()
2449 'b901ef'
2450 >>> value.hex(':')
2451 'b9:01:ef'
2452 >>> value.hex(':', 2)
2453 'b9:01ef'
2454 >>> value.hex(':', -2)
2455 'b901:ef'
2456 [clinic start generated code]*/
2457
2458 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2459 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2460 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2461 {
2462 const char *argbuf = PyBytes_AS_STRING(self);
2463 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2464 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2465 }
2466
2467 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2468 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2469 {
2470 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2471 }
2472
2473
2474 static PyMethodDef
2475 bytes_methods[] = {
2476 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2477 {"capitalize", stringlib_capitalize, METH_NOARGS,
2478 _Py_capitalize__doc__},
2479 STRINGLIB_CENTER_METHODDEF
2480 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2481 _Py_count__doc__},
2482 BYTES_DECODE_METHODDEF
2483 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2484 _Py_endswith__doc__},
2485 STRINGLIB_EXPANDTABS_METHODDEF
2486 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2487 _Py_find__doc__},
2488 BYTES_FROMHEX_METHODDEF
2489 BYTES_HEX_METHODDEF
2490 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2491 {"isalnum", stringlib_isalnum, METH_NOARGS,
2492 _Py_isalnum__doc__},
2493 {"isalpha", stringlib_isalpha, METH_NOARGS,
2494 _Py_isalpha__doc__},
2495 {"isascii", stringlib_isascii, METH_NOARGS,
2496 _Py_isascii__doc__},
2497 {"isdigit", stringlib_isdigit, METH_NOARGS,
2498 _Py_isdigit__doc__},
2499 {"islower", stringlib_islower, METH_NOARGS,
2500 _Py_islower__doc__},
2501 {"isspace", stringlib_isspace, METH_NOARGS,
2502 _Py_isspace__doc__},
2503 {"istitle", stringlib_istitle, METH_NOARGS,
2504 _Py_istitle__doc__},
2505 {"isupper", stringlib_isupper, METH_NOARGS,
2506 _Py_isupper__doc__},
2507 BYTES_JOIN_METHODDEF
2508 STRINGLIB_LJUST_METHODDEF
2509 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2510 BYTES_LSTRIP_METHODDEF
2511 BYTES_MAKETRANS_METHODDEF
2512 BYTES_PARTITION_METHODDEF
2513 BYTES_REPLACE_METHODDEF
2514 BYTES_REMOVEPREFIX_METHODDEF
2515 BYTES_REMOVESUFFIX_METHODDEF
2516 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2517 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2518 STRINGLIB_RJUST_METHODDEF
2519 BYTES_RPARTITION_METHODDEF
2520 BYTES_RSPLIT_METHODDEF
2521 BYTES_RSTRIP_METHODDEF
2522 BYTES_SPLIT_METHODDEF
2523 BYTES_SPLITLINES_METHODDEF
2524 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2525 _Py_startswith__doc__},
2526 BYTES_STRIP_METHODDEF
2527 {"swapcase", stringlib_swapcase, METH_NOARGS,
2528 _Py_swapcase__doc__},
2529 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2530 BYTES_TRANSLATE_METHODDEF
2531 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2532 STRINGLIB_ZFILL_METHODDEF
2533 {NULL, NULL} /* sentinel */
2534 };
2535
2536 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2537 bytes_mod(PyObject *self, PyObject *arg)
2538 {
2539 if (!PyBytes_Check(self)) {
2540 Py_RETURN_NOTIMPLEMENTED;
2541 }
2542 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2543 arg, 0);
2544 }
2545
2546 static PyNumberMethods bytes_as_number = {
2547 0, /*nb_add*/
2548 0, /*nb_subtract*/
2549 0, /*nb_multiply*/
2550 bytes_mod, /*nb_remainder*/
2551 };
2552
2553 static PyObject *
2554 bytes_subtype_new(PyTypeObject *, PyObject *);
2555
2556 /*[clinic input]
2557 @classmethod
2558 bytes.__new__ as bytes_new
2559
2560 source as x: object = NULL
2561 encoding: str = NULL
2562 errors: str = NULL
2563
2564 [clinic start generated code]*/
2565
2566 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2567 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2568 const char *errors)
2569 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2570 {
2571 PyObject *bytes;
2572 PyObject *func;
2573 Py_ssize_t size;
2574
2575 if (x == NULL) {
2576 if (encoding != NULL || errors != NULL) {
2577 PyErr_SetString(PyExc_TypeError,
2578 encoding != NULL ?
2579 "encoding without a string argument" :
2580 "errors without a string argument");
2581 return NULL;
2582 }
2583 bytes = PyBytes_FromStringAndSize(NULL, 0);
2584 }
2585 else if (encoding != NULL) {
2586 /* Encode via the codec registry */
2587 if (!PyUnicode_Check(x)) {
2588 PyErr_SetString(PyExc_TypeError,
2589 "encoding without a string argument");
2590 return NULL;
2591 }
2592 bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2593 }
2594 else if (errors != NULL) {
2595 PyErr_SetString(PyExc_TypeError,
2596 PyUnicode_Check(x) ?
2597 "string argument without an encoding" :
2598 "errors without a string argument");
2599 return NULL;
2600 }
2601 /* We'd like to call PyObject_Bytes here, but we need to check for an
2602 integer argument before deferring to PyBytes_FromObject, something
2603 PyObject_Bytes doesn't do. */
2604 else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
2605 bytes = _PyObject_CallNoArg(func);
2606 Py_DECREF(func);
2607 if (bytes == NULL)
2608 return NULL;
2609 if (!PyBytes_Check(bytes)) {
2610 PyErr_Format(PyExc_TypeError,
2611 "__bytes__ returned non-bytes (type %.200s)",
2612 Py_TYPE(bytes)->tp_name);
2613 Py_DECREF(bytes);
2614 return NULL;
2615 }
2616 }
2617 else if (PyErr_Occurred())
2618 return NULL;
2619 else if (PyUnicode_Check(x)) {
2620 PyErr_SetString(PyExc_TypeError,
2621 "string argument without an encoding");
2622 return NULL;
2623 }
2624 /* Is it an integer? */
2625 else if (_PyIndex_Check(x)) {
2626 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2627 if (size == -1 && PyErr_Occurred()) {
2628 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2629 return NULL;
2630 PyErr_Clear(); /* fall through */
2631 bytes = PyBytes_FromObject(x);
2632 }
2633 else {
2634 if (size < 0) {
2635 PyErr_SetString(PyExc_ValueError, "negative count");
2636 return NULL;
2637 }
2638 bytes = _PyBytes_FromSize(size, 1);
2639 }
2640 }
2641 else {
2642 bytes = PyBytes_FromObject(x);
2643 }
2644
2645 if (bytes != NULL && type != &PyBytes_Type) {
2646 Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2647 }
2648
2649 return bytes;
2650 }
2651
2652 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2653 _PyBytes_FromBuffer(PyObject *x)
2654 {
2655 PyObject *new;
2656 Py_buffer view;
2657
2658 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2659 return NULL;
2660
2661 new = PyBytes_FromStringAndSize(NULL, view.len);
2662 if (!new)
2663 goto fail;
2664 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2665 &view, view.len, 'C') < 0)
2666 goto fail;
2667 PyBuffer_Release(&view);
2668 return new;
2669
2670 fail:
2671 Py_XDECREF(new);
2672 PyBuffer_Release(&view);
2673 return NULL;
2674 }
2675
2676 static PyObject*
_PyBytes_FromList(PyObject * x)2677 _PyBytes_FromList(PyObject *x)
2678 {
2679 Py_ssize_t i, size = PyList_GET_SIZE(x);
2680 Py_ssize_t value;
2681 char *str;
2682 PyObject *item;
2683 _PyBytesWriter writer;
2684
2685 _PyBytesWriter_Init(&writer);
2686 str = _PyBytesWriter_Alloc(&writer, size);
2687 if (str == NULL)
2688 return NULL;
2689 writer.overallocate = 1;
2690 size = writer.allocated;
2691
2692 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2693 item = PyList_GET_ITEM(x, i);
2694 Py_INCREF(item);
2695 value = PyNumber_AsSsize_t(item, NULL);
2696 Py_DECREF(item);
2697 if (value == -1 && PyErr_Occurred())
2698 goto error;
2699
2700 if (value < 0 || value >= 256) {
2701 PyErr_SetString(PyExc_ValueError,
2702 "bytes must be in range(0, 256)");
2703 goto error;
2704 }
2705
2706 if (i >= size) {
2707 str = _PyBytesWriter_Resize(&writer, str, size+1);
2708 if (str == NULL)
2709 return NULL;
2710 size = writer.allocated;
2711 }
2712 *str++ = (char) value;
2713 }
2714 return _PyBytesWriter_Finish(&writer, str);
2715
2716 error:
2717 _PyBytesWriter_Dealloc(&writer);
2718 return NULL;
2719 }
2720
2721 static PyObject*
_PyBytes_FromTuple(PyObject * x)2722 _PyBytes_FromTuple(PyObject *x)
2723 {
2724 PyObject *bytes;
2725 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2726 Py_ssize_t value;
2727 char *str;
2728 PyObject *item;
2729
2730 bytes = PyBytes_FromStringAndSize(NULL, size);
2731 if (bytes == NULL)
2732 return NULL;
2733 str = ((PyBytesObject *)bytes)->ob_sval;
2734
2735 for (i = 0; i < size; i++) {
2736 item = PyTuple_GET_ITEM(x, i);
2737 value = PyNumber_AsSsize_t(item, NULL);
2738 if (value == -1 && PyErr_Occurred())
2739 goto error;
2740
2741 if (value < 0 || value >= 256) {
2742 PyErr_SetString(PyExc_ValueError,
2743 "bytes must be in range(0, 256)");
2744 goto error;
2745 }
2746 *str++ = (char) value;
2747 }
2748 return bytes;
2749
2750 error:
2751 Py_DECREF(bytes);
2752 return NULL;
2753 }
2754
2755 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2756 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2757 {
2758 char *str;
2759 Py_ssize_t i, size;
2760 _PyBytesWriter writer;
2761
2762 /* For iterator version, create a bytes object and resize as needed */
2763 size = PyObject_LengthHint(x, 64);
2764 if (size == -1 && PyErr_Occurred())
2765 return NULL;
2766
2767 _PyBytesWriter_Init(&writer);
2768 str = _PyBytesWriter_Alloc(&writer, size);
2769 if (str == NULL)
2770 return NULL;
2771 writer.overallocate = 1;
2772 size = writer.allocated;
2773
2774 /* Run the iterator to exhaustion */
2775 for (i = 0; ; i++) {
2776 PyObject *item;
2777 Py_ssize_t value;
2778
2779 /* Get the next item */
2780 item = PyIter_Next(it);
2781 if (item == NULL) {
2782 if (PyErr_Occurred())
2783 goto error;
2784 break;
2785 }
2786
2787 /* Interpret it as an int (__index__) */
2788 value = PyNumber_AsSsize_t(item, NULL);
2789 Py_DECREF(item);
2790 if (value == -1 && PyErr_Occurred())
2791 goto error;
2792
2793 /* Range check */
2794 if (value < 0 || value >= 256) {
2795 PyErr_SetString(PyExc_ValueError,
2796 "bytes must be in range(0, 256)");
2797 goto error;
2798 }
2799
2800 /* Append the byte */
2801 if (i >= size) {
2802 str = _PyBytesWriter_Resize(&writer, str, size+1);
2803 if (str == NULL)
2804 return NULL;
2805 size = writer.allocated;
2806 }
2807 *str++ = (char) value;
2808 }
2809
2810 return _PyBytesWriter_Finish(&writer, str);
2811
2812 error:
2813 _PyBytesWriter_Dealloc(&writer);
2814 return NULL;
2815 }
2816
2817 PyObject *
PyBytes_FromObject(PyObject * x)2818 PyBytes_FromObject(PyObject *x)
2819 {
2820 PyObject *it, *result;
2821
2822 if (x == NULL) {
2823 PyErr_BadInternalCall();
2824 return NULL;
2825 }
2826
2827 if (PyBytes_CheckExact(x)) {
2828 Py_INCREF(x);
2829 return x;
2830 }
2831
2832 /* Use the modern buffer interface */
2833 if (PyObject_CheckBuffer(x))
2834 return _PyBytes_FromBuffer(x);
2835
2836 if (PyList_CheckExact(x))
2837 return _PyBytes_FromList(x);
2838
2839 if (PyTuple_CheckExact(x))
2840 return _PyBytes_FromTuple(x);
2841
2842 if (!PyUnicode_Check(x)) {
2843 it = PyObject_GetIter(x);
2844 if (it != NULL) {
2845 result = _PyBytes_FromIterator(it, x);
2846 Py_DECREF(it);
2847 return result;
2848 }
2849 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2850 return NULL;
2851 }
2852 }
2853
2854 PyErr_Format(PyExc_TypeError,
2855 "cannot convert '%.200s' object to bytes",
2856 Py_TYPE(x)->tp_name);
2857 return NULL;
2858 }
2859
2860 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2861 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2862 {
2863 PyObject *pnew;
2864 Py_ssize_t n;
2865
2866 assert(PyType_IsSubtype(type, &PyBytes_Type));
2867 assert(PyBytes_Check(tmp));
2868 n = PyBytes_GET_SIZE(tmp);
2869 pnew = type->tp_alloc(type, n);
2870 if (pnew != NULL) {
2871 memcpy(PyBytes_AS_STRING(pnew),
2872 PyBytes_AS_STRING(tmp), n+1);
2873 ((PyBytesObject *)pnew)->ob_shash =
2874 ((PyBytesObject *)tmp)->ob_shash;
2875 }
2876 return pnew;
2877 }
2878
2879 PyDoc_STRVAR(bytes_doc,
2880 "bytes(iterable_of_ints) -> bytes\n\
2881 bytes(string, encoding[, errors]) -> bytes\n\
2882 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2883 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2884 bytes() -> empty bytes object\n\
2885 \n\
2886 Construct an immutable array of bytes from:\n\
2887 - an iterable yielding integers in range(256)\n\
2888 - a text string encoded using the specified encoding\n\
2889 - any object implementing the buffer API.\n\
2890 - an integer");
2891
2892 static PyObject *bytes_iter(PyObject *seq);
2893
2894 PyTypeObject PyBytes_Type = {
2895 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2896 "bytes",
2897 PyBytesObject_SIZE,
2898 sizeof(char),
2899 0, /* tp_dealloc */
2900 0, /* tp_vectorcall_offset */
2901 0, /* tp_getattr */
2902 0, /* tp_setattr */
2903 0, /* tp_as_async */
2904 (reprfunc)bytes_repr, /* tp_repr */
2905 &bytes_as_number, /* tp_as_number */
2906 &bytes_as_sequence, /* tp_as_sequence */
2907 &bytes_as_mapping, /* tp_as_mapping */
2908 (hashfunc)bytes_hash, /* tp_hash */
2909 0, /* tp_call */
2910 bytes_str, /* tp_str */
2911 PyObject_GenericGetAttr, /* tp_getattro */
2912 0, /* tp_setattro */
2913 &bytes_as_buffer, /* tp_as_buffer */
2914 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2915 Py_TPFLAGS_BYTES_SUBCLASS |
2916 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2917 bytes_doc, /* tp_doc */
2918 0, /* tp_traverse */
2919 0, /* tp_clear */
2920 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2921 0, /* tp_weaklistoffset */
2922 bytes_iter, /* tp_iter */
2923 0, /* tp_iternext */
2924 bytes_methods, /* tp_methods */
2925 0, /* tp_members */
2926 0, /* tp_getset */
2927 &PyBaseObject_Type, /* tp_base */
2928 0, /* tp_dict */
2929 0, /* tp_descr_get */
2930 0, /* tp_descr_set */
2931 0, /* tp_dictoffset */
2932 0, /* tp_init */
2933 0, /* tp_alloc */
2934 bytes_new, /* tp_new */
2935 PyObject_Del, /* tp_free */
2936 };
2937
2938 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2939 PyBytes_Concat(PyObject **pv, PyObject *w)
2940 {
2941 assert(pv != NULL);
2942 if (*pv == NULL)
2943 return;
2944 if (w == NULL) {
2945 Py_CLEAR(*pv);
2946 return;
2947 }
2948
2949 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2950 /* Only one reference, so we can resize in place */
2951 Py_ssize_t oldsize;
2952 Py_buffer wb;
2953
2954 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2955 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2956 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2957 Py_CLEAR(*pv);
2958 return;
2959 }
2960
2961 oldsize = PyBytes_GET_SIZE(*pv);
2962 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2963 PyErr_NoMemory();
2964 goto error;
2965 }
2966 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2967 goto error;
2968
2969 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2970 PyBuffer_Release(&wb);
2971 return;
2972
2973 error:
2974 PyBuffer_Release(&wb);
2975 Py_CLEAR(*pv);
2976 return;
2977 }
2978
2979 else {
2980 /* Multiple references, need to create new object */
2981 PyObject *v;
2982 v = bytes_concat(*pv, w);
2983 Py_SETREF(*pv, v);
2984 }
2985 }
2986
2987 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2988 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2989 {
2990 PyBytes_Concat(pv, w);
2991 Py_XDECREF(w);
2992 }
2993
2994
2995 /* The following function breaks the notion that bytes are immutable:
2996 it changes the size of a bytes object. We get away with this only if there
2997 is only one module referencing the object. You can also think of it
2998 as creating a new bytes object and destroying the old one, only
2999 more efficiently. In any case, don't use this if the bytes object may
3000 already be known to some other part of the code...
3001 Note that if there's not enough memory to resize the bytes object, the
3002 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3003 memory" exception is set, and -1 is returned. Else (on success) 0 is
3004 returned, and the value in *pv may or may not be the same as on input.
3005 As always, an extra byte is allocated for a trailing \0 byte (newsize
3006 does *not* include that), and a trailing \0 byte is stored.
3007 */
3008
3009 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3010 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3011 {
3012 PyObject *v;
3013 PyBytesObject *sv;
3014 v = *pv;
3015 if (!PyBytes_Check(v) || newsize < 0) {
3016 goto error;
3017 }
3018 if (Py_SIZE(v) == newsize) {
3019 /* return early if newsize equals to v->ob_size */
3020 return 0;
3021 }
3022 if (Py_SIZE(v) == 0) {
3023 if (newsize == 0) {
3024 return 0;
3025 }
3026 *pv = _PyBytes_FromSize(newsize, 0);
3027 Py_DECREF(v);
3028 return (*pv == NULL) ? -1 : 0;
3029 }
3030 if (Py_REFCNT(v) != 1) {
3031 goto error;
3032 }
3033 if (newsize == 0) {
3034 *pv = bytes_new_empty();
3035 Py_DECREF(v);
3036 return 0;
3037 }
3038 /* XXX UNREF/NEWREF interface should be more symmetrical */
3039 #ifdef Py_REF_DEBUG
3040 _Py_RefTotal--;
3041 #endif
3042 #ifdef Py_TRACE_REFS
3043 _Py_ForgetReference(v);
3044 #endif
3045 *pv = (PyObject *)
3046 PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3047 if (*pv == NULL) {
3048 PyObject_Free(v);
3049 PyErr_NoMemory();
3050 return -1;
3051 }
3052 _Py_NewReference(*pv);
3053 sv = (PyBytesObject *) *pv;
3054 Py_SET_SIZE(sv, newsize);
3055 sv->ob_sval[newsize] = '\0';
3056 sv->ob_shash = -1; /* invalidate cached hash value */
3057 return 0;
3058 error:
3059 *pv = 0;
3060 Py_DECREF(v);
3061 PyErr_BadInternalCall();
3062 return -1;
3063 }
3064
3065
3066 PyStatus
_PyBytes_Init(PyInterpreterState * interp)3067 _PyBytes_Init(PyInterpreterState *interp)
3068 {
3069 struct _Py_bytes_state *state = &interp->bytes;
3070 if (bytes_create_empty_string_singleton(state) < 0) {
3071 return _PyStatus_NO_MEMORY();
3072 }
3073 return _PyStatus_OK();
3074 }
3075
3076
3077 void
_PyBytes_Fini(PyInterpreterState * interp)3078 _PyBytes_Fini(PyInterpreterState *interp)
3079 {
3080 struct _Py_bytes_state* state = &interp->bytes;
3081 for (int i = 0; i < UCHAR_MAX + 1; i++) {
3082 Py_CLEAR(state->characters[i]);
3083 }
3084 Py_CLEAR(state->empty_string);
3085 }
3086
3087 /*********************** Bytes Iterator ****************************/
3088
3089 typedef struct {
3090 PyObject_HEAD
3091 Py_ssize_t it_index;
3092 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3093 } striterobject;
3094
3095 static void
striter_dealloc(striterobject * it)3096 striter_dealloc(striterobject *it)
3097 {
3098 _PyObject_GC_UNTRACK(it);
3099 Py_XDECREF(it->it_seq);
3100 PyObject_GC_Del(it);
3101 }
3102
3103 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3104 striter_traverse(striterobject *it, visitproc visit, void *arg)
3105 {
3106 Py_VISIT(it->it_seq);
3107 return 0;
3108 }
3109
3110 static PyObject *
striter_next(striterobject * it)3111 striter_next(striterobject *it)
3112 {
3113 PyBytesObject *seq;
3114
3115 assert(it != NULL);
3116 seq = it->it_seq;
3117 if (seq == NULL)
3118 return NULL;
3119 assert(PyBytes_Check(seq));
3120
3121 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3122 return PyLong_FromLong(
3123 (unsigned char)seq->ob_sval[it->it_index++]);
3124 }
3125
3126 it->it_seq = NULL;
3127 Py_DECREF(seq);
3128 return NULL;
3129 }
3130
3131 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3132 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3133 {
3134 Py_ssize_t len = 0;
3135 if (it->it_seq)
3136 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3137 return PyLong_FromSsize_t(len);
3138 }
3139
3140 PyDoc_STRVAR(length_hint_doc,
3141 "Private method returning an estimate of len(list(it)).");
3142
3143 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3144 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3145 {
3146 _Py_IDENTIFIER(iter);
3147 if (it->it_seq != NULL) {
3148 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3149 it->it_seq, it->it_index);
3150 } else {
3151 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3152 }
3153 }
3154
3155 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3156
3157 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3158 striter_setstate(striterobject *it, PyObject *state)
3159 {
3160 Py_ssize_t index = PyLong_AsSsize_t(state);
3161 if (index == -1 && PyErr_Occurred())
3162 return NULL;
3163 if (it->it_seq != NULL) {
3164 if (index < 0)
3165 index = 0;
3166 else if (index > PyBytes_GET_SIZE(it->it_seq))
3167 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3168 it->it_index = index;
3169 }
3170 Py_RETURN_NONE;
3171 }
3172
3173 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3174
3175 static PyMethodDef striter_methods[] = {
3176 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3177 length_hint_doc},
3178 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3179 reduce_doc},
3180 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3181 setstate_doc},
3182 {NULL, NULL} /* sentinel */
3183 };
3184
3185 PyTypeObject PyBytesIter_Type = {
3186 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3187 "bytes_iterator", /* tp_name */
3188 sizeof(striterobject), /* tp_basicsize */
3189 0, /* tp_itemsize */
3190 /* methods */
3191 (destructor)striter_dealloc, /* tp_dealloc */
3192 0, /* tp_vectorcall_offset */
3193 0, /* tp_getattr */
3194 0, /* tp_setattr */
3195 0, /* tp_as_async */
3196 0, /* tp_repr */
3197 0, /* tp_as_number */
3198 0, /* tp_as_sequence */
3199 0, /* tp_as_mapping */
3200 0, /* tp_hash */
3201 0, /* tp_call */
3202 0, /* tp_str */
3203 PyObject_GenericGetAttr, /* tp_getattro */
3204 0, /* tp_setattro */
3205 0, /* tp_as_buffer */
3206 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3207 0, /* tp_doc */
3208 (traverseproc)striter_traverse, /* tp_traverse */
3209 0, /* tp_clear */
3210 0, /* tp_richcompare */
3211 0, /* tp_weaklistoffset */
3212 PyObject_SelfIter, /* tp_iter */
3213 (iternextfunc)striter_next, /* tp_iternext */
3214 striter_methods, /* tp_methods */
3215 0,
3216 };
3217
3218 static PyObject *
bytes_iter(PyObject * seq)3219 bytes_iter(PyObject *seq)
3220 {
3221 striterobject *it;
3222
3223 if (!PyBytes_Check(seq)) {
3224 PyErr_BadInternalCall();
3225 return NULL;
3226 }
3227 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3228 if (it == NULL)
3229 return NULL;
3230 it->it_index = 0;
3231 Py_INCREF(seq);
3232 it->it_seq = (PyBytesObject *)seq;
3233 _PyObject_GC_TRACK(it);
3234 return (PyObject *)it;
3235 }
3236
3237
3238 /* _PyBytesWriter API */
3239
3240 #ifdef MS_WINDOWS
3241 /* On Windows, overallocate by 50% is the best factor */
3242 # define OVERALLOCATE_FACTOR 2
3243 #else
3244 /* On Linux, overallocate by 25% is the best factor */
3245 # define OVERALLOCATE_FACTOR 4
3246 #endif
3247
3248 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3249 _PyBytesWriter_Init(_PyBytesWriter *writer)
3250 {
3251 /* Set all attributes before small_buffer to 0 */
3252 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3253 #ifndef NDEBUG
3254 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3255 sizeof(writer->small_buffer));
3256 #endif
3257 }
3258
3259 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3260 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3261 {
3262 Py_CLEAR(writer->buffer);
3263 }
3264
3265 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3266 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3267 {
3268 if (writer->use_small_buffer) {
3269 assert(writer->buffer == NULL);
3270 return writer->small_buffer;
3271 }
3272 else if (writer->use_bytearray) {
3273 assert(writer->buffer != NULL);
3274 return PyByteArray_AS_STRING(writer->buffer);
3275 }
3276 else {
3277 assert(writer->buffer != NULL);
3278 return PyBytes_AS_STRING(writer->buffer);
3279 }
3280 }
3281
3282 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3283 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3284 {
3285 const char *start = _PyBytesWriter_AsString(writer);
3286 assert(str != NULL);
3287 assert(str >= start);
3288 assert(str - start <= writer->allocated);
3289 return str - start;
3290 }
3291
3292 #ifndef NDEBUG
3293 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3294 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3295 {
3296 const char *start, *end;
3297
3298 if (writer->use_small_buffer) {
3299 assert(writer->buffer == NULL);
3300 }
3301 else {
3302 assert(writer->buffer != NULL);
3303 if (writer->use_bytearray)
3304 assert(PyByteArray_CheckExact(writer->buffer));
3305 else
3306 assert(PyBytes_CheckExact(writer->buffer));
3307 assert(Py_REFCNT(writer->buffer) == 1);
3308 }
3309
3310 if (writer->use_bytearray) {
3311 /* bytearray has its own overallocation algorithm,
3312 writer overallocation must be disabled */
3313 assert(!writer->overallocate);
3314 }
3315
3316 assert(0 <= writer->allocated);
3317 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3318 /* the last byte must always be null */
3319 start = _PyBytesWriter_AsString(writer);
3320 assert(start[writer->allocated] == 0);
3321
3322 end = start + writer->allocated;
3323 assert(str != NULL);
3324 assert(start <= str && str <= end);
3325 return 1;
3326 }
3327 #endif
3328
3329 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3330 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3331 {
3332 Py_ssize_t allocated, pos;
3333
3334 assert(_PyBytesWriter_CheckConsistency(writer, str));
3335 assert(writer->allocated < size);
3336
3337 allocated = size;
3338 if (writer->overallocate
3339 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3340 /* overallocate to limit the number of realloc() */
3341 allocated += allocated / OVERALLOCATE_FACTOR;
3342 }
3343
3344 pos = _PyBytesWriter_GetSize(writer, str);
3345 if (!writer->use_small_buffer) {
3346 if (writer->use_bytearray) {
3347 if (PyByteArray_Resize(writer->buffer, allocated))
3348 goto error;
3349 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3350 but we cannot use ob_alloc because bytes may need to be moved
3351 to use the whole buffer. bytearray uses an internal optimization
3352 to avoid moving or copying bytes when bytes are removed at the
3353 beginning (ex: del bytearray[:1]). */
3354 }
3355 else {
3356 if (_PyBytes_Resize(&writer->buffer, allocated))
3357 goto error;
3358 }
3359 }
3360 else {
3361 /* convert from stack buffer to bytes object buffer */
3362 assert(writer->buffer == NULL);
3363
3364 if (writer->use_bytearray)
3365 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3366 else
3367 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3368 if (writer->buffer == NULL)
3369 goto error;
3370
3371 if (pos != 0) {
3372 char *dest;
3373 if (writer->use_bytearray)
3374 dest = PyByteArray_AS_STRING(writer->buffer);
3375 else
3376 dest = PyBytes_AS_STRING(writer->buffer);
3377 memcpy(dest,
3378 writer->small_buffer,
3379 pos);
3380 }
3381
3382 writer->use_small_buffer = 0;
3383 #ifndef NDEBUG
3384 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3385 sizeof(writer->small_buffer));
3386 #endif
3387 }
3388 writer->allocated = allocated;
3389
3390 str = _PyBytesWriter_AsString(writer) + pos;
3391 assert(_PyBytesWriter_CheckConsistency(writer, str));
3392 return str;
3393
3394 error:
3395 _PyBytesWriter_Dealloc(writer);
3396 return NULL;
3397 }
3398
3399 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3400 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3401 {
3402 Py_ssize_t new_min_size;
3403
3404 assert(_PyBytesWriter_CheckConsistency(writer, str));
3405 assert(size >= 0);
3406
3407 if (size == 0) {
3408 /* nothing to do */
3409 return str;
3410 }
3411
3412 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3413 PyErr_NoMemory();
3414 _PyBytesWriter_Dealloc(writer);
3415 return NULL;
3416 }
3417 new_min_size = writer->min_size + size;
3418
3419 if (new_min_size > writer->allocated)
3420 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3421
3422 writer->min_size = new_min_size;
3423 return str;
3424 }
3425
3426 /* Allocate the buffer to write size bytes.
3427 Return the pointer to the beginning of buffer data.
3428 Raise an exception and return NULL on error. */
3429 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3430 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3431 {
3432 /* ensure that _PyBytesWriter_Alloc() is only called once */
3433 assert(writer->min_size == 0 && writer->buffer == NULL);
3434 assert(size >= 0);
3435
3436 writer->use_small_buffer = 1;
3437 #ifndef NDEBUG
3438 writer->allocated = sizeof(writer->small_buffer) - 1;
3439 /* In debug mode, don't use the full small buffer because it is less
3440 efficient than bytes and bytearray objects to detect buffer underflow
3441 and buffer overflow. Use 10 bytes of the small buffer to test also
3442 code using the smaller buffer in debug mode.
3443
3444 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3445 in debug mode to also be able to detect stack overflow when running
3446 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3447 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3448 stack overflow. */
3449 writer->allocated = Py_MIN(writer->allocated, 10);
3450 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3451 to detect buffer overflow */
3452 writer->small_buffer[writer->allocated] = 0;
3453 #else
3454 writer->allocated = sizeof(writer->small_buffer);
3455 #endif
3456 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3457 }
3458
3459 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3460 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3461 {
3462 Py_ssize_t size;
3463 PyObject *result;
3464
3465 assert(_PyBytesWriter_CheckConsistency(writer, str));
3466
3467 size = _PyBytesWriter_GetSize(writer, str);
3468 if (size == 0 && !writer->use_bytearray) {
3469 Py_CLEAR(writer->buffer);
3470 /* Get the empty byte string singleton */
3471 result = PyBytes_FromStringAndSize(NULL, 0);
3472 }
3473 else if (writer->use_small_buffer) {
3474 if (writer->use_bytearray) {
3475 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3476 }
3477 else {
3478 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3479 }
3480 }
3481 else {
3482 result = writer->buffer;
3483 writer->buffer = NULL;
3484
3485 if (size != writer->allocated) {
3486 if (writer->use_bytearray) {
3487 if (PyByteArray_Resize(result, size)) {
3488 Py_DECREF(result);
3489 return NULL;
3490 }
3491 }
3492 else {
3493 if (_PyBytes_Resize(&result, size)) {
3494 assert(result == NULL);
3495 return NULL;
3496 }
3497 }
3498 }
3499 }
3500 return result;
3501 }
3502
3503 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3504 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3505 const void *bytes, Py_ssize_t size)
3506 {
3507 char *str = (char *)ptr;
3508
3509 str = _PyBytesWriter_Prepare(writer, str, size);
3510 if (str == NULL)
3511 return NULL;
3512
3513 memcpy(str, bytes, size);
3514 str += size;
3515
3516 return str;
3517 }
3518