1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "pycore_abstract.h" // _PyIndex_Check()
7 #include "pycore_bytes_methods.h"
8 #include "pycore_object.h"
9 #include "pycore_pymem.h" // PYMEM_CLEANBYTE
10
11 #include "pystrhex.h"
12 #include <stddef.h>
13
14 /*[clinic input]
15 class bytes "PyBytesObject *" "&PyBytes_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
18
19 #include "clinic/bytesobject.c.h"
20
21 static PyBytesObject *characters[UCHAR_MAX + 1];
22 static PyBytesObject *nullstring;
23
24 _Py_IDENTIFIER(__bytes__);
25
26 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31 */
32 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
34 /* Forward declaration */
35 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
38 /*
39 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
42 For PyBytes_FromStringAndSize(), the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyBytes object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
57 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59 */
60 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)61 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
62 {
63 PyBytesObject *op;
64 assert(size >= 0);
65
66 if (size == 0 && (op = nullstring) != NULL) {
67 Py_INCREF(op);
68 return (PyObject *)op;
69 }
70
71 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
72 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
76
77 /* Inline PyObject_NewVar */
78 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
82 if (op == NULL)
83 return PyErr_NoMemory();
84 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
85 op->ob_shash = -1;
86 if (!use_calloc)
87 op->ob_sval[size] = '\0';
88 /* empty byte string singleton */
89 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
92 }
93 return (PyObject *) op;
94 }
95
96 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)97 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98 {
99 PyBytesObject *op;
100 if (size < 0) {
101 PyErr_SetString(PyExc_SystemError,
102 "Negative size passed to PyBytes_FromStringAndSize");
103 return NULL;
104 }
105 if (size == 1 && str != NULL &&
106 (op = characters[*str & UCHAR_MAX]) != NULL)
107 {
108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
118 memcpy(op->ob_sval, str, size);
119 /* share short strings */
120 if (size == 1) {
121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
125 }
126
127 PyObject *
PyBytes_FromString(const char * str)128 PyBytes_FromString(const char *str)
129 {
130 size_t size;
131 PyBytesObject *op;
132
133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
144 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148
149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 memcpy(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165 }
166
167 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)168 PyBytes_FromFormatV(const char *format, va_list vargs)
169 {
170 char *s;
171 const char *f;
172 const char *p;
173 Py_ssize_t prec;
174 int longflag;
175 int size_tflag;
176 /* Longest 64-bit formatted numbers:
177 - "18446744073709551615\0" (21 bytes)
178 - "-9223372036854775808\0" (21 bytes)
179 Decimal takes the most space (it isn't enough for octal.)
180
181 Longest 64-bit pointer representation:
182 "0xffffffffffffffff\0" (19 bytes). */
183 char buffer[21];
184 _PyBytesWriter writer;
185
186 _PyBytesWriter_Init(&writer);
187
188 s = _PyBytesWriter_Alloc(&writer, strlen(format));
189 if (s == NULL)
190 return NULL;
191 writer.overallocate = 1;
192
193 #define WRITE_BYTES(str) \
194 do { \
195 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196 if (s == NULL) \
197 goto error; \
198 } while (0)
199
200 for (f = format; *f; f++) {
201 if (*f != '%') {
202 *s++ = *f;
203 continue;
204 }
205
206 p = f++;
207
208 /* ignore the width (ex: 10 in "%10s") */
209 while (Py_ISDIGIT(*f))
210 f++;
211
212 /* parse the precision (ex: 10 in "%.10s") */
213 prec = 0;
214 if (*f == '.') {
215 f++;
216 for (; Py_ISDIGIT(*f); f++) {
217 prec = (prec * 10) + (*f - '0');
218 }
219 }
220
221 while (*f && *f != '%' && !Py_ISALPHA(*f))
222 f++;
223
224 /* handle the long flag ('l'), but only for %ld and %lu.
225 others can be added when necessary. */
226 longflag = 0;
227 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228 longflag = 1;
229 ++f;
230 }
231
232 /* handle the size_t flag ('z'). */
233 size_tflag = 0;
234 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235 size_tflag = 1;
236 ++f;
237 }
238
239 /* subtract bytes preallocated for the format string
240 (ex: 2 for "%s") */
241 writer.min_size -= (f - p + 1);
242
243 switch (*f) {
244 case 'c':
245 {
246 int c = va_arg(vargs, int);
247 if (c < 0 || c > 255) {
248 PyErr_SetString(PyExc_OverflowError,
249 "PyBytes_FromFormatV(): %c format "
250 "expects an integer in range [0; 255]");
251 goto error;
252 }
253 writer.min_size++;
254 *s++ = (unsigned char)c;
255 break;
256 }
257
258 case 'd':
259 if (longflag)
260 sprintf(buffer, "%ld", va_arg(vargs, long));
261 else if (size_tflag)
262 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
263 va_arg(vargs, Py_ssize_t));
264 else
265 sprintf(buffer, "%d", va_arg(vargs, int));
266 assert(strlen(buffer) < sizeof(buffer));
267 WRITE_BYTES(buffer);
268 break;
269
270 case 'u':
271 if (longflag)
272 sprintf(buffer, "%lu",
273 va_arg(vargs, unsigned long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
276 va_arg(vargs, size_t));
277 else
278 sprintf(buffer, "%u",
279 va_arg(vargs, unsigned int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'i':
285 sprintf(buffer, "%i", va_arg(vargs, int));
286 assert(strlen(buffer) < sizeof(buffer));
287 WRITE_BYTES(buffer);
288 break;
289
290 case 'x':
291 sprintf(buffer, "%x", va_arg(vargs, int));
292 assert(strlen(buffer) < sizeof(buffer));
293 WRITE_BYTES(buffer);
294 break;
295
296 case 's':
297 {
298 Py_ssize_t i;
299
300 p = va_arg(vargs, const char*);
301 if (prec <= 0) {
302 i = strlen(p);
303 }
304 else {
305 i = 0;
306 while (i < prec && p[i]) {
307 i++;
308 }
309 }
310 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
311 if (s == NULL)
312 goto error;
313 break;
314 }
315
316 case 'p':
317 sprintf(buffer, "%p", va_arg(vargs, void*));
318 assert(strlen(buffer) < sizeof(buffer));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (buffer[1] == 'X')
321 buffer[1] = 'x';
322 else if (buffer[1] != 'x') {
323 memmove(buffer+2, buffer, strlen(buffer)+1);
324 buffer[0] = '0';
325 buffer[1] = 'x';
326 }
327 WRITE_BYTES(buffer);
328 break;
329
330 case '%':
331 writer.min_size++;
332 *s++ = '%';
333 break;
334
335 default:
336 if (*f == 0) {
337 /* fix min_size if we reached the end of the format string */
338 writer.min_size++;
339 }
340
341 /* invalid format string: copy unformatted string and exit */
342 WRITE_BYTES(p);
343 return _PyBytesWriter_Finish(&writer, s);
344 }
345 }
346
347 #undef WRITE_BYTES
348
349 return _PyBytesWriter_Finish(&writer, s);
350
351 error:
352 _PyBytesWriter_Dealloc(&writer);
353 return NULL;
354 }
355
356 PyObject *
PyBytes_FromFormat(const char * format,...)357 PyBytes_FromFormat(const char *format, ...)
358 {
359 PyObject* ret;
360 va_list vargs;
361
362 #ifdef HAVE_STDARG_PROTOTYPES
363 va_start(vargs, format);
364 #else
365 va_start(vargs);
366 #endif
367 ret = PyBytes_FromFormatV(format, vargs);
368 va_end(vargs);
369 return ret;
370 }
371
372 /* Helpers for formatstring */
373
374 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)375 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
376 {
377 Py_ssize_t argidx = *p_argidx;
378 if (argidx < arglen) {
379 (*p_argidx)++;
380 if (arglen < 0)
381 return args;
382 else
383 return PyTuple_GetItem(args, argidx);
384 }
385 PyErr_SetString(PyExc_TypeError,
386 "not enough arguments for format string");
387 return NULL;
388 }
389
390 /* Format codes
391 * F_LJUST '-'
392 * F_SIGN '+'
393 * F_BLANK ' '
394 * F_ALT '#'
395 * F_ZERO '0'
396 */
397 #define F_LJUST (1<<0)
398 #define F_SIGN (1<<1)
399 #define F_BLANK (1<<2)
400 #define F_ALT (1<<3)
401 #define F_ZERO (1<<4)
402
403 /* Returns a new reference to a PyBytes object, or NULL on failure. */
404
405 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)406 formatfloat(PyObject *v, int flags, int prec, int type,
407 PyObject **p_result, _PyBytesWriter *writer, char *str)
408 {
409 char *p;
410 PyObject *result;
411 double x;
412 size_t len;
413
414 x = PyFloat_AsDouble(v);
415 if (x == -1.0 && PyErr_Occurred()) {
416 PyErr_Format(PyExc_TypeError, "float argument required, "
417 "not %.200s", Py_TYPE(v)->tp_name);
418 return NULL;
419 }
420
421 if (prec < 0)
422 prec = 6;
423
424 p = PyOS_double_to_string(x, type, prec,
425 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
426
427 if (p == NULL)
428 return NULL;
429
430 len = strlen(p);
431 if (writer != NULL) {
432 str = _PyBytesWriter_Prepare(writer, str, len);
433 if (str == NULL)
434 return NULL;
435 memcpy(str, p, len);
436 PyMem_Free(p);
437 str += len;
438 return str;
439 }
440
441 result = PyBytes_FromStringAndSize(p, len);
442 PyMem_Free(p);
443 *p_result = result;
444 return result != NULL ? str : NULL;
445 }
446
447 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)448 formatlong(PyObject *v, int flags, int prec, int type)
449 {
450 PyObject *result, *iobj;
451 if (type == 'i')
452 type = 'd';
453 if (PyLong_Check(v))
454 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455 if (PyNumber_Check(v)) {
456 /* make sure number is a type of integer for o, x, and X */
457 if (type == 'o' || type == 'x' || type == 'X')
458 iobj = PyNumber_Index(v);
459 else
460 iobj = PyNumber_Long(v);
461 if (iobj == NULL) {
462 if (!PyErr_ExceptionMatches(PyExc_TypeError))
463 return NULL;
464 }
465 else if (!PyLong_Check(iobj))
466 Py_CLEAR(iobj);
467 if (iobj != NULL) {
468 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469 Py_DECREF(iobj);
470 return result;
471 }
472 }
473 PyErr_Format(PyExc_TypeError,
474 "%%%c format: %s is required, not %.200s", type,
475 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476 : "a number",
477 Py_TYPE(v)->tp_name);
478 return NULL;
479 }
480
481 static int
byte_converter(PyObject * arg,char * p)482 byte_converter(PyObject *arg, char *p)
483 {
484 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
485 *p = PyBytes_AS_STRING(arg)[0];
486 return 1;
487 }
488 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
489 *p = PyByteArray_AS_STRING(arg)[0];
490 return 1;
491 }
492 else {
493 PyObject *iobj;
494 long ival;
495 int overflow;
496 /* make sure number is a type of integer */
497 if (PyLong_Check(arg)) {
498 ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 }
500 else {
501 iobj = PyNumber_Index(arg);
502 if (iobj == NULL) {
503 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504 return 0;
505 goto onError;
506 }
507 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508 Py_DECREF(iobj);
509 }
510 if (!overflow && ival == -1 && PyErr_Occurred())
511 goto onError;
512 if (overflow || !(0 <= ival && ival <= 255)) {
513 PyErr_SetString(PyExc_OverflowError,
514 "%c arg not in range(256)");
515 return 0;
516 }
517 *p = (char)ival;
518 return 1;
519 }
520 onError:
521 PyErr_SetString(PyExc_TypeError,
522 "%c requires an integer in range(256) or a single byte");
523 return 0;
524 }
525
526 static PyObject *_PyBytes_FromBuffer(PyObject *x);
527
528 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)529 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
530 {
531 PyObject *func, *result;
532 /* is it a bytes object? */
533 if (PyBytes_Check(v)) {
534 *pbuf = PyBytes_AS_STRING(v);
535 *plen = PyBytes_GET_SIZE(v);
536 Py_INCREF(v);
537 return v;
538 }
539 if (PyByteArray_Check(v)) {
540 *pbuf = PyByteArray_AS_STRING(v);
541 *plen = PyByteArray_GET_SIZE(v);
542 Py_INCREF(v);
543 return v;
544 }
545 /* does it support __bytes__? */
546 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
547 if (func != NULL) {
548 result = _PyObject_CallNoArg(func);
549 Py_DECREF(func);
550 if (result == NULL)
551 return NULL;
552 if (!PyBytes_Check(result)) {
553 PyErr_Format(PyExc_TypeError,
554 "__bytes__ returned non-bytes (type %.200s)",
555 Py_TYPE(result)->tp_name);
556 Py_DECREF(result);
557 return NULL;
558 }
559 *pbuf = PyBytes_AS_STRING(result);
560 *plen = PyBytes_GET_SIZE(result);
561 return result;
562 }
563 /* does it support buffer protocol? */
564 if (PyObject_CheckBuffer(v)) {
565 /* maybe we can avoid making a copy of the buffer object here? */
566 result = _PyBytes_FromBuffer(v);
567 if (result == NULL)
568 return NULL;
569 *pbuf = PyBytes_AS_STRING(result);
570 *plen = PyBytes_GET_SIZE(result);
571 return result;
572 }
573 PyErr_Format(PyExc_TypeError,
574 "%%b requires a bytes-like object, "
575 "or an object that implements __bytes__, not '%.100s'",
576 Py_TYPE(v)->tp_name);
577 return NULL;
578 }
579
580 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
581
582 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)583 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
584 PyObject *args, int use_bytearray)
585 {
586 const char *fmt;
587 char *res;
588 Py_ssize_t arglen, argidx;
589 Py_ssize_t fmtcnt;
590 int args_owned = 0;
591 PyObject *dict = NULL;
592 _PyBytesWriter writer;
593
594 if (args == NULL) {
595 PyErr_BadInternalCall();
596 return NULL;
597 }
598 fmt = format;
599 fmtcnt = format_len;
600
601 _PyBytesWriter_Init(&writer);
602 writer.use_bytearray = use_bytearray;
603
604 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
605 if (res == NULL)
606 return NULL;
607 if (!use_bytearray)
608 writer.overallocate = 1;
609
610 if (PyTuple_Check(args)) {
611 arglen = PyTuple_GET_SIZE(args);
612 argidx = 0;
613 }
614 else {
615 arglen = -1;
616 argidx = -2;
617 }
618 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
619 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
620 !PyByteArray_Check(args)) {
621 dict = args;
622 }
623
624 while (--fmtcnt >= 0) {
625 if (*fmt != '%') {
626 Py_ssize_t len;
627 char *pos;
628
629 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
630 if (pos != NULL)
631 len = pos - fmt;
632 else
633 len = fmtcnt + 1;
634 assert(len != 0);
635
636 memcpy(res, fmt, len);
637 res += len;
638 fmt += len;
639 fmtcnt -= (len - 1);
640 }
641 else {
642 /* Got a format specifier */
643 int flags = 0;
644 Py_ssize_t width = -1;
645 int prec = -1;
646 int c = '\0';
647 int fill;
648 PyObject *v = NULL;
649 PyObject *temp = NULL;
650 const char *pbuf = NULL;
651 int sign;
652 Py_ssize_t len = 0;
653 char onechar; /* For byte_converter() */
654 Py_ssize_t alloc;
655
656 fmt++;
657 if (*fmt == '%') {
658 *res++ = '%';
659 fmt++;
660 fmtcnt--;
661 continue;
662 }
663 if (*fmt == '(') {
664 const char *keystart;
665 Py_ssize_t keylen;
666 PyObject *key;
667 int pcount = 1;
668
669 if (dict == NULL) {
670 PyErr_SetString(PyExc_TypeError,
671 "format requires a mapping");
672 goto error;
673 }
674 ++fmt;
675 --fmtcnt;
676 keystart = fmt;
677 /* Skip over balanced parentheses */
678 while (pcount > 0 && --fmtcnt >= 0) {
679 if (*fmt == ')')
680 --pcount;
681 else if (*fmt == '(')
682 ++pcount;
683 fmt++;
684 }
685 keylen = fmt - keystart - 1;
686 if (fmtcnt < 0 || pcount > 0) {
687 PyErr_SetString(PyExc_ValueError,
688 "incomplete format key");
689 goto error;
690 }
691 key = PyBytes_FromStringAndSize(keystart,
692 keylen);
693 if (key == NULL)
694 goto error;
695 if (args_owned) {
696 Py_DECREF(args);
697 args_owned = 0;
698 }
699 args = PyObject_GetItem(dict, key);
700 Py_DECREF(key);
701 if (args == NULL) {
702 goto error;
703 }
704 args_owned = 1;
705 arglen = -1;
706 argidx = -2;
707 }
708
709 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
710 while (--fmtcnt >= 0) {
711 switch (c = *fmt++) {
712 case '-': flags |= F_LJUST; continue;
713 case '+': flags |= F_SIGN; continue;
714 case ' ': flags |= F_BLANK; continue;
715 case '#': flags |= F_ALT; continue;
716 case '0': flags |= F_ZERO; continue;
717 }
718 break;
719 }
720
721 /* Parse width. Example: "%10s" => width=10 */
722 if (c == '*') {
723 v = getnextarg(args, arglen, &argidx);
724 if (v == NULL)
725 goto error;
726 if (!PyLong_Check(v)) {
727 PyErr_SetString(PyExc_TypeError,
728 "* wants int");
729 goto error;
730 }
731 width = PyLong_AsSsize_t(v);
732 if (width == -1 && PyErr_Occurred())
733 goto error;
734 if (width < 0) {
735 flags |= F_LJUST;
736 width = -width;
737 }
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 width = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "width too big");
751 goto error;
752 }
753 width = width*10 + (c - '0');
754 }
755 }
756
757 /* Parse precision. Example: "%.3f" => prec=3 */
758 if (c == '.') {
759 prec = 0;
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 if (c == '*') {
763 v = getnextarg(args, arglen, &argidx);
764 if (v == NULL)
765 goto error;
766 if (!PyLong_Check(v)) {
767 PyErr_SetString(
768 PyExc_TypeError,
769 "* wants int");
770 goto error;
771 }
772 prec = _PyLong_AsInt(v);
773 if (prec == -1 && PyErr_Occurred())
774 goto error;
775 if (prec < 0)
776 prec = 0;
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 }
780 else if (c >= 0 && isdigit(c)) {
781 prec = c - '0';
782 while (--fmtcnt >= 0) {
783 c = Py_CHARMASK(*fmt++);
784 if (!isdigit(c))
785 break;
786 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
787 PyErr_SetString(
788 PyExc_ValueError,
789 "prec too big");
790 goto error;
791 }
792 prec = prec*10 + (c - '0');
793 }
794 }
795 } /* prec */
796 if (fmtcnt >= 0) {
797 if (c == 'h' || c == 'l' || c == 'L') {
798 if (--fmtcnt >= 0)
799 c = *fmt++;
800 }
801 }
802 if (fmtcnt < 0) {
803 PyErr_SetString(PyExc_ValueError,
804 "incomplete format");
805 goto error;
806 }
807 v = getnextarg(args, arglen, &argidx);
808 if (v == NULL)
809 goto error;
810
811 if (fmtcnt == 0) {
812 /* last write: disable writer overallocation */
813 writer.overallocate = 0;
814 }
815
816 sign = 0;
817 fill = ' ';
818 switch (c) {
819 case 'r':
820 // %r is only for 2/3 code; 3 only code should use %a
821 case 'a':
822 temp = PyObject_ASCII(v);
823 if (temp == NULL)
824 goto error;
825 assert(PyUnicode_IS_ASCII(temp));
826 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
827 len = PyUnicode_GET_LENGTH(temp);
828 if (prec >= 0 && len > prec)
829 len = prec;
830 break;
831
832 case 's':
833 // %s is only for 2/3 code; 3 only code should use %b
834 case 'b':
835 temp = format_obj(v, &pbuf, &len);
836 if (temp == NULL)
837 goto error;
838 if (prec >= 0 && len > prec)
839 len = prec;
840 break;
841
842 case 'i':
843 case 'd':
844 case 'u':
845 case 'o':
846 case 'x':
847 case 'X':
848 if (PyLong_CheckExact(v)
849 && width == -1 && prec == -1
850 && !(flags & (F_SIGN | F_BLANK))
851 && c != 'X')
852 {
853 /* Fast path */
854 int alternate = flags & F_ALT;
855 int base;
856
857 switch(c)
858 {
859 default:
860 Py_UNREACHABLE();
861 case 'd':
862 case 'i':
863 case 'u':
864 base = 10;
865 break;
866 case 'o':
867 base = 8;
868 break;
869 case 'x':
870 case 'X':
871 base = 16;
872 break;
873 }
874
875 /* Fast path */
876 writer.min_size -= 2; /* size preallocated for "%d" */
877 res = _PyLong_FormatBytesWriter(&writer, res,
878 v, base, alternate);
879 if (res == NULL)
880 goto error;
881 continue;
882 }
883
884 temp = formatlong(v, flags, prec, c);
885 if (!temp)
886 goto error;
887 assert(PyUnicode_IS_ASCII(temp));
888 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
889 len = PyUnicode_GET_LENGTH(temp);
890 sign = 1;
891 if (flags & F_ZERO)
892 fill = '0';
893 break;
894
895 case 'e':
896 case 'E':
897 case 'f':
898 case 'F':
899 case 'g':
900 case 'G':
901 if (width == -1 && prec == -1
902 && !(flags & (F_SIGN | F_BLANK)))
903 {
904 /* Fast path */
905 writer.min_size -= 2; /* size preallocated for "%f" */
906 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
907 if (res == NULL)
908 goto error;
909 continue;
910 }
911
912 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
913 goto error;
914 pbuf = PyBytes_AS_STRING(temp);
915 len = PyBytes_GET_SIZE(temp);
916 sign = 1;
917 if (flags & F_ZERO)
918 fill = '0';
919 break;
920
921 case 'c':
922 pbuf = &onechar;
923 len = byte_converter(v, &onechar);
924 if (!len)
925 goto error;
926 if (width == -1) {
927 /* Fast path */
928 *res++ = onechar;
929 continue;
930 }
931 break;
932
933 default:
934 PyErr_Format(PyExc_ValueError,
935 "unsupported format character '%c' (0x%x) "
936 "at index %zd",
937 c, c,
938 (Py_ssize_t)(fmt - 1 - format));
939 goto error;
940 }
941
942 if (sign) {
943 if (*pbuf == '-' || *pbuf == '+') {
944 sign = *pbuf++;
945 len--;
946 }
947 else if (flags & F_SIGN)
948 sign = '+';
949 else if (flags & F_BLANK)
950 sign = ' ';
951 else
952 sign = 0;
953 }
954 if (width < len)
955 width = len;
956
957 alloc = width;
958 if (sign != 0 && len == width)
959 alloc++;
960 /* 2: size preallocated for %s */
961 if (alloc > 2) {
962 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
963 if (res == NULL)
964 goto error;
965 }
966 #ifndef NDEBUG
967 char *before = res;
968 #endif
969
970 /* Write the sign if needed */
971 if (sign) {
972 if (fill != ' ')
973 *res++ = sign;
974 if (width > len)
975 width--;
976 }
977
978 /* Write the numeric prefix for "x", "X" and "o" formats
979 if the alternate form is used.
980 For example, write "0x" for the "%#x" format. */
981 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
982 assert(pbuf[0] == '0');
983 assert(pbuf[1] == c);
984 if (fill != ' ') {
985 *res++ = *pbuf++;
986 *res++ = *pbuf++;
987 }
988 width -= 2;
989 if (width < 0)
990 width = 0;
991 len -= 2;
992 }
993
994 /* Pad left with the fill character if needed */
995 if (width > len && !(flags & F_LJUST)) {
996 memset(res, fill, width - len);
997 res += (width - len);
998 width = len;
999 }
1000
1001 /* If padding with spaces: write sign if needed and/or numeric
1002 prefix if the alternate form is used */
1003 if (fill == ' ') {
1004 if (sign)
1005 *res++ = sign;
1006 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1007 assert(pbuf[0] == '0');
1008 assert(pbuf[1] == c);
1009 *res++ = *pbuf++;
1010 *res++ = *pbuf++;
1011 }
1012 }
1013
1014 /* Copy bytes */
1015 memcpy(res, pbuf, len);
1016 res += len;
1017
1018 /* Pad right with the fill character if needed */
1019 if (width > len) {
1020 memset(res, ' ', width - len);
1021 res += (width - len);
1022 }
1023
1024 if (dict && (argidx < arglen)) {
1025 PyErr_SetString(PyExc_TypeError,
1026 "not all arguments converted during bytes formatting");
1027 Py_XDECREF(temp);
1028 goto error;
1029 }
1030 Py_XDECREF(temp);
1031
1032 #ifndef NDEBUG
1033 /* check that we computed the exact size for this write */
1034 assert((res - before) == alloc);
1035 #endif
1036 } /* '%' */
1037
1038 /* If overallocation was disabled, ensure that it was the last
1039 write. Otherwise, we missed an optimization */
1040 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1041 } /* until end */
1042
1043 if (argidx < arglen && !dict) {
1044 PyErr_SetString(PyExc_TypeError,
1045 "not all arguments converted during bytes formatting");
1046 goto error;
1047 }
1048
1049 if (args_owned) {
1050 Py_DECREF(args);
1051 }
1052 return _PyBytesWriter_Finish(&writer, res);
1053
1054 error:
1055 _PyBytesWriter_Dealloc(&writer);
1056 if (args_owned) {
1057 Py_DECREF(args);
1058 }
1059 return NULL;
1060 }
1061
1062 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1063 PyObject *_PyBytes_DecodeEscape(const char *s,
1064 Py_ssize_t len,
1065 const char *errors,
1066 const char **first_invalid_escape)
1067 {
1068 int c;
1069 char *p;
1070 const char *end;
1071 _PyBytesWriter writer;
1072
1073 _PyBytesWriter_Init(&writer);
1074
1075 p = _PyBytesWriter_Alloc(&writer, len);
1076 if (p == NULL)
1077 return NULL;
1078 writer.overallocate = 1;
1079
1080 *first_invalid_escape = NULL;
1081
1082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
1085 *p++ = *s++;
1086 continue;
1087 }
1088
1089 s++;
1090 if (s == end) {
1091 PyErr_SetString(PyExc_ValueError,
1092 "Trailing \\ in string");
1093 goto failed;
1094 }
1095
1096 switch (*s++) {
1097 /* XXX This assumes ASCII! */
1098 case '\n': break;
1099 case '\\': *p++ = '\\'; break;
1100 case '\'': *p++ = '\''; break;
1101 case '\"': *p++ = '\"'; break;
1102 case 'b': *p++ = '\b'; break;
1103 case 'f': *p++ = '\014'; break; /* FF */
1104 case 't': *p++ = '\t'; break;
1105 case 'n': *p++ = '\n'; break;
1106 case 'r': *p++ = '\r'; break;
1107 case 'v': *p++ = '\013'; break; /* VT */
1108 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1109 case '0': case '1': case '2': case '3':
1110 case '4': case '5': case '6': case '7':
1111 c = s[-1] - '0';
1112 if (s < end && '0' <= *s && *s <= '7') {
1113 c = (c<<3) + *s++ - '0';
1114 if (s < end && '0' <= *s && *s <= '7')
1115 c = (c<<3) + *s++ - '0';
1116 }
1117 *p++ = c;
1118 break;
1119 case 'x':
1120 if (s+1 < end) {
1121 int digit1, digit2;
1122 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1123 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1124 if (digit1 < 16 && digit2 < 16) {
1125 *p++ = (unsigned char)((digit1 << 4) + digit2);
1126 s += 2;
1127 break;
1128 }
1129 }
1130 /* invalid hexadecimal digits */
1131
1132 if (!errors || strcmp(errors, "strict") == 0) {
1133 PyErr_Format(PyExc_ValueError,
1134 "invalid \\x escape at position %zd",
1135 s - 2 - (end - len));
1136 goto failed;
1137 }
1138 if (strcmp(errors, "replace") == 0) {
1139 *p++ = '?';
1140 } else if (strcmp(errors, "ignore") == 0)
1141 /* do nothing */;
1142 else {
1143 PyErr_Format(PyExc_ValueError,
1144 "decoding error; unknown "
1145 "error handling code: %.400s",
1146 errors);
1147 goto failed;
1148 }
1149 /* skip \x */
1150 if (s < end && Py_ISXDIGIT(s[0]))
1151 s++; /* and a hexdigit */
1152 break;
1153
1154 default:
1155 if (*first_invalid_escape == NULL) {
1156 *first_invalid_escape = s-1; /* Back up one char, since we've
1157 already incremented s. */
1158 }
1159 *p++ = '\\';
1160 s--;
1161 }
1162 }
1163
1164 return _PyBytesWriter_Finish(&writer, p);
1165
1166 failed:
1167 _PyBytesWriter_Dealloc(&writer);
1168 return NULL;
1169 }
1170
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1171 PyObject *PyBytes_DecodeEscape(const char *s,
1172 Py_ssize_t len,
1173 const char *errors,
1174 Py_ssize_t Py_UNUSED(unicode),
1175 const char *Py_UNUSED(recode_encoding))
1176 {
1177 const char* first_invalid_escape;
1178 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1179 &first_invalid_escape);
1180 if (result == NULL)
1181 return NULL;
1182 if (first_invalid_escape != NULL) {
1183 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1184 "invalid escape sequence '\\%c'",
1185 (unsigned char)*first_invalid_escape) < 0) {
1186 Py_DECREF(result);
1187 return NULL;
1188 }
1189 }
1190 return result;
1191
1192 }
1193 /* -------------------------------------------------------------------- */
1194 /* object api */
1195
1196 Py_ssize_t
PyBytes_Size(PyObject * op)1197 PyBytes_Size(PyObject *op)
1198 {
1199 if (!PyBytes_Check(op)) {
1200 PyErr_Format(PyExc_TypeError,
1201 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1202 return -1;
1203 }
1204 return Py_SIZE(op);
1205 }
1206
1207 char *
PyBytes_AsString(PyObject * op)1208 PyBytes_AsString(PyObject *op)
1209 {
1210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return NULL;
1214 }
1215 return ((PyBytesObject *)op)->ob_sval;
1216 }
1217
1218 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1219 PyBytes_AsStringAndSize(PyObject *obj,
1220 char **s,
1221 Py_ssize_t *len)
1222 {
1223 if (s == NULL) {
1224 PyErr_BadInternalCall();
1225 return -1;
1226 }
1227
1228 if (!PyBytes_Check(obj)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1231 return -1;
1232 }
1233
1234 *s = PyBytes_AS_STRING(obj);
1235 if (len != NULL)
1236 *len = PyBytes_GET_SIZE(obj);
1237 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1238 PyErr_SetString(PyExc_ValueError,
1239 "embedded null byte");
1240 return -1;
1241 }
1242 return 0;
1243 }
1244
1245 /* -------------------------------------------------------------------- */
1246 /* Methods */
1247
1248 #include "stringlib/stringdefs.h"
1249
1250 #include "stringlib/fastsearch.h"
1251 #include "stringlib/count.h"
1252 #include "stringlib/find.h"
1253 #include "stringlib/join.h"
1254 #include "stringlib/partition.h"
1255 #include "stringlib/split.h"
1256 #include "stringlib/ctype.h"
1257
1258 #include "stringlib/transmogrify.h"
1259
1260 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1261 PyBytes_Repr(PyObject *obj, int smartquotes)
1262 {
1263 PyBytesObject* op = (PyBytesObject*) obj;
1264 Py_ssize_t i, length = Py_SIZE(op);
1265 Py_ssize_t newsize, squotes, dquotes;
1266 PyObject *v;
1267 unsigned char quote;
1268 const unsigned char *s;
1269 Py_UCS1 *p;
1270
1271 /* Compute size of output string */
1272 squotes = dquotes = 0;
1273 newsize = 3; /* b'' */
1274 s = (const unsigned char*)op->ob_sval;
1275 for (i = 0; i < length; i++) {
1276 Py_ssize_t incr = 1;
1277 switch(s[i]) {
1278 case '\'': squotes++; break;
1279 case '"': dquotes++; break;
1280 case '\\': case '\t': case '\n': case '\r':
1281 incr = 2; break; /* \C */
1282 default:
1283 if (s[i] < ' ' || s[i] >= 0x7f)
1284 incr = 4; /* \xHH */
1285 }
1286 if (newsize > PY_SSIZE_T_MAX - incr)
1287 goto overflow;
1288 newsize += incr;
1289 }
1290 quote = '\'';
1291 if (smartquotes && squotes && !dquotes)
1292 quote = '"';
1293 if (squotes && quote == '\'') {
1294 if (newsize > PY_SSIZE_T_MAX - squotes)
1295 goto overflow;
1296 newsize += squotes;
1297 }
1298
1299 v = PyUnicode_New(newsize, 127);
1300 if (v == NULL) {
1301 return NULL;
1302 }
1303 p = PyUnicode_1BYTE_DATA(v);
1304
1305 *p++ = 'b', *p++ = quote;
1306 for (i = 0; i < length; i++) {
1307 unsigned char c = op->ob_sval[i];
1308 if (c == quote || c == '\\')
1309 *p++ = '\\', *p++ = c;
1310 else if (c == '\t')
1311 *p++ = '\\', *p++ = 't';
1312 else if (c == '\n')
1313 *p++ = '\\', *p++ = 'n';
1314 else if (c == '\r')
1315 *p++ = '\\', *p++ = 'r';
1316 else if (c < ' ' || c >= 0x7f) {
1317 *p++ = '\\';
1318 *p++ = 'x';
1319 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1320 *p++ = Py_hexdigits[c & 0xf];
1321 }
1322 else
1323 *p++ = c;
1324 }
1325 *p++ = quote;
1326 assert(_PyUnicode_CheckConsistency(v, 1));
1327 return v;
1328
1329 overflow:
1330 PyErr_SetString(PyExc_OverflowError,
1331 "bytes object is too large to make repr");
1332 return NULL;
1333 }
1334
1335 static PyObject *
bytes_repr(PyObject * op)1336 bytes_repr(PyObject *op)
1337 {
1338 return PyBytes_Repr(op, 1);
1339 }
1340
1341 static PyObject *
bytes_str(PyObject * op)1342 bytes_str(PyObject *op)
1343 {
1344 if (_Py_GetConfig()->bytes_warning) {
1345 if (PyErr_WarnEx(PyExc_BytesWarning,
1346 "str() on a bytes instance", 1)) {
1347 return NULL;
1348 }
1349 }
1350 return bytes_repr(op);
1351 }
1352
1353 static Py_ssize_t
bytes_length(PyBytesObject * a)1354 bytes_length(PyBytesObject *a)
1355 {
1356 return Py_SIZE(a);
1357 }
1358
1359 /* This is also used by PyBytes_Concat() */
1360 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1361 bytes_concat(PyObject *a, PyObject *b)
1362 {
1363 Py_buffer va, vb;
1364 PyObject *result = NULL;
1365
1366 va.len = -1;
1367 vb.len = -1;
1368 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1369 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1370 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1371 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1372 goto done;
1373 }
1374
1375 /* Optimize end cases */
1376 if (va.len == 0 && PyBytes_CheckExact(b)) {
1377 result = b;
1378 Py_INCREF(result);
1379 goto done;
1380 }
1381 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1382 result = a;
1383 Py_INCREF(result);
1384 goto done;
1385 }
1386
1387 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1388 PyErr_NoMemory();
1389 goto done;
1390 }
1391
1392 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1393 if (result != NULL) {
1394 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1395 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1396 }
1397
1398 done:
1399 if (va.len != -1)
1400 PyBuffer_Release(&va);
1401 if (vb.len != -1)
1402 PyBuffer_Release(&vb);
1403 return result;
1404 }
1405
1406 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1407 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1408 {
1409 Py_ssize_t i;
1410 Py_ssize_t j;
1411 Py_ssize_t size;
1412 PyBytesObject *op;
1413 size_t nbytes;
1414 if (n < 0)
1415 n = 0;
1416 /* watch out for overflows: the size can overflow int,
1417 * and the # of bytes needed can overflow size_t
1418 */
1419 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1420 PyErr_SetString(PyExc_OverflowError,
1421 "repeated bytes are too long");
1422 return NULL;
1423 }
1424 size = Py_SIZE(a) * n;
1425 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1426 Py_INCREF(a);
1427 return (PyObject *)a;
1428 }
1429 nbytes = (size_t)size;
1430 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1431 PyErr_SetString(PyExc_OverflowError,
1432 "repeated bytes are too long");
1433 return NULL;
1434 }
1435 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1436 if (op == NULL)
1437 return PyErr_NoMemory();
1438 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1439 op->ob_shash = -1;
1440 op->ob_sval[size] = '\0';
1441 if (Py_SIZE(a) == 1 && n > 0) {
1442 memset(op->ob_sval, a->ob_sval[0] , n);
1443 return (PyObject *) op;
1444 }
1445 i = 0;
1446 if (i < size) {
1447 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1448 i = Py_SIZE(a);
1449 }
1450 while (i < size) {
1451 j = (i <= size-i) ? i : size-i;
1452 memcpy(op->ob_sval+i, op->ob_sval, j);
1453 i += j;
1454 }
1455 return (PyObject *) op;
1456 }
1457
1458 static int
bytes_contains(PyObject * self,PyObject * arg)1459 bytes_contains(PyObject *self, PyObject *arg)
1460 {
1461 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1462 }
1463
1464 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1465 bytes_item(PyBytesObject *a, Py_ssize_t i)
1466 {
1467 if (i < 0 || i >= Py_SIZE(a)) {
1468 PyErr_SetString(PyExc_IndexError, "index out of range");
1469 return NULL;
1470 }
1471 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1472 }
1473
1474 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1475 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1476 {
1477 int cmp;
1478 Py_ssize_t len;
1479
1480 len = Py_SIZE(a);
1481 if (Py_SIZE(b) != len)
1482 return 0;
1483
1484 if (a->ob_sval[0] != b->ob_sval[0])
1485 return 0;
1486
1487 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1488 return (cmp == 0);
1489 }
1490
1491 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1492 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1493 {
1494 int c;
1495 Py_ssize_t len_a, len_b;
1496 Py_ssize_t min_len;
1497 int rc;
1498
1499 /* Make sure both arguments are strings. */
1500 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1501 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1502 rc = PyObject_IsInstance((PyObject*)a,
1503 (PyObject*)&PyUnicode_Type);
1504 if (!rc)
1505 rc = PyObject_IsInstance((PyObject*)b,
1506 (PyObject*)&PyUnicode_Type);
1507 if (rc < 0)
1508 return NULL;
1509 if (rc) {
1510 if (PyErr_WarnEx(PyExc_BytesWarning,
1511 "Comparison between bytes and string", 1))
1512 return NULL;
1513 }
1514 else {
1515 rc = PyObject_IsInstance((PyObject*)a,
1516 (PyObject*)&PyLong_Type);
1517 if (!rc)
1518 rc = PyObject_IsInstance((PyObject*)b,
1519 (PyObject*)&PyLong_Type);
1520 if (rc < 0)
1521 return NULL;
1522 if (rc) {
1523 if (PyErr_WarnEx(PyExc_BytesWarning,
1524 "Comparison between bytes and int", 1))
1525 return NULL;
1526 }
1527 }
1528 }
1529 Py_RETURN_NOTIMPLEMENTED;
1530 }
1531 else if (a == b) {
1532 switch (op) {
1533 case Py_EQ:
1534 case Py_LE:
1535 case Py_GE:
1536 /* a string is equal to itself */
1537 Py_RETURN_TRUE;
1538 case Py_NE:
1539 case Py_LT:
1540 case Py_GT:
1541 Py_RETURN_FALSE;
1542 default:
1543 PyErr_BadArgument();
1544 return NULL;
1545 }
1546 }
1547 else if (op == Py_EQ || op == Py_NE) {
1548 int eq = bytes_compare_eq(a, b);
1549 eq ^= (op == Py_NE);
1550 return PyBool_FromLong(eq);
1551 }
1552 else {
1553 len_a = Py_SIZE(a);
1554 len_b = Py_SIZE(b);
1555 min_len = Py_MIN(len_a, len_b);
1556 if (min_len > 0) {
1557 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1558 if (c == 0)
1559 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1560 }
1561 else
1562 c = 0;
1563 if (c != 0)
1564 Py_RETURN_RICHCOMPARE(c, 0, op);
1565 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1566 }
1567 }
1568
1569 static Py_hash_t
bytes_hash(PyBytesObject * a)1570 bytes_hash(PyBytesObject *a)
1571 {
1572 if (a->ob_shash == -1) {
1573 /* Can't fail */
1574 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1575 }
1576 return a->ob_shash;
1577 }
1578
1579 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1580 bytes_subscript(PyBytesObject* self, PyObject* item)
1581 {
1582 if (_PyIndex_Check(item)) {
1583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1584 if (i == -1 && PyErr_Occurred())
1585 return NULL;
1586 if (i < 0)
1587 i += PyBytes_GET_SIZE(self);
1588 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1589 PyErr_SetString(PyExc_IndexError,
1590 "index out of range");
1591 return NULL;
1592 }
1593 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1594 }
1595 else if (PySlice_Check(item)) {
1596 Py_ssize_t start, stop, step, slicelength, i;
1597 size_t cur;
1598 const char* source_buf;
1599 char* result_buf;
1600 PyObject* result;
1601
1602 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1603 return NULL;
1604 }
1605 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1606 &stop, step);
1607
1608 if (slicelength <= 0) {
1609 return PyBytes_FromStringAndSize("", 0);
1610 }
1611 else if (start == 0 && step == 1 &&
1612 slicelength == PyBytes_GET_SIZE(self) &&
1613 PyBytes_CheckExact(self)) {
1614 Py_INCREF(self);
1615 return (PyObject *)self;
1616 }
1617 else if (step == 1) {
1618 return PyBytes_FromStringAndSize(
1619 PyBytes_AS_STRING(self) + start,
1620 slicelength);
1621 }
1622 else {
1623 source_buf = PyBytes_AS_STRING(self);
1624 result = PyBytes_FromStringAndSize(NULL, slicelength);
1625 if (result == NULL)
1626 return NULL;
1627
1628 result_buf = PyBytes_AS_STRING(result);
1629 for (cur = start, i = 0; i < slicelength;
1630 cur += step, i++) {
1631 result_buf[i] = source_buf[cur];
1632 }
1633
1634 return result;
1635 }
1636 }
1637 else {
1638 PyErr_Format(PyExc_TypeError,
1639 "byte indices must be integers or slices, not %.200s",
1640 Py_TYPE(item)->tp_name);
1641 return NULL;
1642 }
1643 }
1644
1645 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1646 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1647 {
1648 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1649 1, flags);
1650 }
1651
1652 static PySequenceMethods bytes_as_sequence = {
1653 (lenfunc)bytes_length, /*sq_length*/
1654 (binaryfunc)bytes_concat, /*sq_concat*/
1655 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1656 (ssizeargfunc)bytes_item, /*sq_item*/
1657 0, /*sq_slice*/
1658 0, /*sq_ass_item*/
1659 0, /*sq_ass_slice*/
1660 (objobjproc)bytes_contains /*sq_contains*/
1661 };
1662
1663 static PyMappingMethods bytes_as_mapping = {
1664 (lenfunc)bytes_length,
1665 (binaryfunc)bytes_subscript,
1666 0,
1667 };
1668
1669 static PyBufferProcs bytes_as_buffer = {
1670 (getbufferproc)bytes_buffer_getbuffer,
1671 NULL,
1672 };
1673
1674
1675 #define LEFTSTRIP 0
1676 #define RIGHTSTRIP 1
1677 #define BOTHSTRIP 2
1678
1679 /*[clinic input]
1680 bytes.split
1681
1682 sep: object = None
1683 The delimiter according which to split the bytes.
1684 None (the default value) means split on ASCII whitespace characters
1685 (space, tab, return, newline, formfeed, vertical tab).
1686 maxsplit: Py_ssize_t = -1
1687 Maximum number of splits to do.
1688 -1 (the default value) means no limit.
1689
1690 Return a list of the sections in the bytes, using sep as the delimiter.
1691 [clinic start generated code]*/
1692
1693 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1694 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1695 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1696 {
1697 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1698 const char *s = PyBytes_AS_STRING(self), *sub;
1699 Py_buffer vsub;
1700 PyObject *list;
1701
1702 if (maxsplit < 0)
1703 maxsplit = PY_SSIZE_T_MAX;
1704 if (sep == Py_None)
1705 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1706 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1707 return NULL;
1708 sub = vsub.buf;
1709 n = vsub.len;
1710
1711 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1712 PyBuffer_Release(&vsub);
1713 return list;
1714 }
1715
1716 /*[clinic input]
1717 bytes.partition
1718
1719 sep: Py_buffer
1720 /
1721
1722 Partition the bytes into three parts using the given separator.
1723
1724 This will search for the separator sep in the bytes. If the separator is found,
1725 returns a 3-tuple containing the part before the separator, the separator
1726 itself, and the part after it.
1727
1728 If the separator is not found, returns a 3-tuple containing the original bytes
1729 object and two empty bytes objects.
1730 [clinic start generated code]*/
1731
1732 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1733 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1734 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1735 {
1736 return stringlib_partition(
1737 (PyObject*) self,
1738 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1739 sep->obj, (const char *)sep->buf, sep->len
1740 );
1741 }
1742
1743 /*[clinic input]
1744 bytes.rpartition
1745
1746 sep: Py_buffer
1747 /
1748
1749 Partition the bytes into three parts using the given separator.
1750
1751 This will search for the separator sep in the bytes, starting at the end. If
1752 the separator is found, returns a 3-tuple containing the part before the
1753 separator, the separator itself, and the part after it.
1754
1755 If the separator is not found, returns a 3-tuple containing two empty bytes
1756 objects and the original bytes object.
1757 [clinic start generated code]*/
1758
1759 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1760 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1761 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1762 {
1763 return stringlib_rpartition(
1764 (PyObject*) self,
1765 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1766 sep->obj, (const char *)sep->buf, sep->len
1767 );
1768 }
1769
1770 /*[clinic input]
1771 bytes.rsplit = bytes.split
1772
1773 Return a list of the sections in the bytes, using sep as the delimiter.
1774
1775 Splitting is done starting at the end of the bytes and working to the front.
1776 [clinic start generated code]*/
1777
1778 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1779 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1781 {
1782 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1783 const char *s = PyBytes_AS_STRING(self), *sub;
1784 Py_buffer vsub;
1785 PyObject *list;
1786
1787 if (maxsplit < 0)
1788 maxsplit = PY_SSIZE_T_MAX;
1789 if (sep == Py_None)
1790 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1791 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1792 return NULL;
1793 sub = vsub.buf;
1794 n = vsub.len;
1795
1796 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1797 PyBuffer_Release(&vsub);
1798 return list;
1799 }
1800
1801
1802 /*[clinic input]
1803 bytes.join
1804
1805 iterable_of_bytes: object
1806 /
1807
1808 Concatenate any number of bytes objects.
1809
1810 The bytes whose method is called is inserted in between each pair.
1811
1812 The result is returned as a new bytes object.
1813
1814 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1815 [clinic start generated code]*/
1816
1817 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1818 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1819 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1820 {
1821 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1822 }
1823
1824 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1825 _PyBytes_Join(PyObject *sep, PyObject *x)
1826 {
1827 assert(sep != NULL && PyBytes_Check(sep));
1828 assert(x != NULL);
1829 return bytes_join((PyBytesObject*)sep, x);
1830 }
1831
1832 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1833 bytes_find(PyBytesObject *self, PyObject *args)
1834 {
1835 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1836 }
1837
1838 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1839 bytes_index(PyBytesObject *self, PyObject *args)
1840 {
1841 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1842 }
1843
1844
1845 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1846 bytes_rfind(PyBytesObject *self, PyObject *args)
1847 {
1848 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1849 }
1850
1851
1852 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1853 bytes_rindex(PyBytesObject *self, PyObject *args)
1854 {
1855 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1856 }
1857
1858
1859 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1860 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1861 {
1862 Py_buffer vsep;
1863 const char *s = PyBytes_AS_STRING(self);
1864 Py_ssize_t len = PyBytes_GET_SIZE(self);
1865 char *sep;
1866 Py_ssize_t seplen;
1867 Py_ssize_t i, j;
1868
1869 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1870 return NULL;
1871 sep = vsep.buf;
1872 seplen = vsep.len;
1873
1874 i = 0;
1875 if (striptype != RIGHTSTRIP) {
1876 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1877 i++;
1878 }
1879 }
1880
1881 j = len;
1882 if (striptype != LEFTSTRIP) {
1883 do {
1884 j--;
1885 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1886 j++;
1887 }
1888
1889 PyBuffer_Release(&vsep);
1890
1891 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1892 Py_INCREF(self);
1893 return (PyObject*)self;
1894 }
1895 else
1896 return PyBytes_FromStringAndSize(s+i, j-i);
1897 }
1898
1899
1900 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1901 do_strip(PyBytesObject *self, int striptype)
1902 {
1903 const char *s = PyBytes_AS_STRING(self);
1904 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1905
1906 i = 0;
1907 if (striptype != RIGHTSTRIP) {
1908 while (i < len && Py_ISSPACE(s[i])) {
1909 i++;
1910 }
1911 }
1912
1913 j = len;
1914 if (striptype != LEFTSTRIP) {
1915 do {
1916 j--;
1917 } while (j >= i && Py_ISSPACE(s[j]));
1918 j++;
1919 }
1920
1921 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1922 Py_INCREF(self);
1923 return (PyObject*)self;
1924 }
1925 else
1926 return PyBytes_FromStringAndSize(s+i, j-i);
1927 }
1928
1929
1930 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1931 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1932 {
1933 if (bytes != Py_None) {
1934 return do_xstrip(self, striptype, bytes);
1935 }
1936 return do_strip(self, striptype);
1937 }
1938
1939 /*[clinic input]
1940 bytes.strip
1941
1942 bytes: object = None
1943 /
1944
1945 Strip leading and trailing bytes contained in the argument.
1946
1947 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1948 [clinic start generated code]*/
1949
1950 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1951 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1952 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1953 {
1954 return do_argstrip(self, BOTHSTRIP, bytes);
1955 }
1956
1957 /*[clinic input]
1958 bytes.lstrip
1959
1960 bytes: object = None
1961 /
1962
1963 Strip leading bytes contained in the argument.
1964
1965 If the argument is omitted or None, strip leading ASCII whitespace.
1966 [clinic start generated code]*/
1967
1968 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)1969 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1970 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
1971 {
1972 return do_argstrip(self, LEFTSTRIP, bytes);
1973 }
1974
1975 /*[clinic input]
1976 bytes.rstrip
1977
1978 bytes: object = None
1979 /
1980
1981 Strip trailing bytes contained in the argument.
1982
1983 If the argument is omitted or None, strip trailing ASCII whitespace.
1984 [clinic start generated code]*/
1985
1986 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)1987 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
1988 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
1989 {
1990 return do_argstrip(self, RIGHTSTRIP, bytes);
1991 }
1992
1993
1994 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)1995 bytes_count(PyBytesObject *self, PyObject *args)
1996 {
1997 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1998 }
1999
2000
2001 /*[clinic input]
2002 bytes.translate
2003
2004 table: object
2005 Translation table, which must be a bytes object of length 256.
2006 /
2007 delete as deletechars: object(c_default="NULL") = b''
2008
2009 Return a copy with each character mapped by the given translation table.
2010
2011 All characters occurring in the optional argument delete are removed.
2012 The remaining characters are mapped through the given translation table.
2013 [clinic start generated code]*/
2014
2015 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2016 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2017 PyObject *deletechars)
2018 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2019 {
2020 const char *input;
2021 char *output;
2022 Py_buffer table_view = {NULL, NULL};
2023 Py_buffer del_table_view = {NULL, NULL};
2024 const char *table_chars;
2025 Py_ssize_t i, c, changed = 0;
2026 PyObject *input_obj = (PyObject*)self;
2027 const char *output_start, *del_table_chars=NULL;
2028 Py_ssize_t inlen, tablen, dellen = 0;
2029 PyObject *result;
2030 int trans_table[256];
2031
2032 if (PyBytes_Check(table)) {
2033 table_chars = PyBytes_AS_STRING(table);
2034 tablen = PyBytes_GET_SIZE(table);
2035 }
2036 else if (table == Py_None) {
2037 table_chars = NULL;
2038 tablen = 256;
2039 }
2040 else {
2041 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2042 return NULL;
2043 table_chars = table_view.buf;
2044 tablen = table_view.len;
2045 }
2046
2047 if (tablen != 256) {
2048 PyErr_SetString(PyExc_ValueError,
2049 "translation table must be 256 characters long");
2050 PyBuffer_Release(&table_view);
2051 return NULL;
2052 }
2053
2054 if (deletechars != NULL) {
2055 if (PyBytes_Check(deletechars)) {
2056 del_table_chars = PyBytes_AS_STRING(deletechars);
2057 dellen = PyBytes_GET_SIZE(deletechars);
2058 }
2059 else {
2060 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2061 PyBuffer_Release(&table_view);
2062 return NULL;
2063 }
2064 del_table_chars = del_table_view.buf;
2065 dellen = del_table_view.len;
2066 }
2067 }
2068 else {
2069 del_table_chars = NULL;
2070 dellen = 0;
2071 }
2072
2073 inlen = PyBytes_GET_SIZE(input_obj);
2074 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2075 if (result == NULL) {
2076 PyBuffer_Release(&del_table_view);
2077 PyBuffer_Release(&table_view);
2078 return NULL;
2079 }
2080 output_start = output = PyBytes_AS_STRING(result);
2081 input = PyBytes_AS_STRING(input_obj);
2082
2083 if (dellen == 0 && table_chars != NULL) {
2084 /* If no deletions are required, use faster code */
2085 for (i = inlen; --i >= 0; ) {
2086 c = Py_CHARMASK(*input++);
2087 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2088 changed = 1;
2089 }
2090 if (!changed && PyBytes_CheckExact(input_obj)) {
2091 Py_INCREF(input_obj);
2092 Py_DECREF(result);
2093 result = input_obj;
2094 }
2095 PyBuffer_Release(&del_table_view);
2096 PyBuffer_Release(&table_view);
2097 return result;
2098 }
2099
2100 if (table_chars == NULL) {
2101 for (i = 0; i < 256; i++)
2102 trans_table[i] = Py_CHARMASK(i);
2103 } else {
2104 for (i = 0; i < 256; i++)
2105 trans_table[i] = Py_CHARMASK(table_chars[i]);
2106 }
2107 PyBuffer_Release(&table_view);
2108
2109 for (i = 0; i < dellen; i++)
2110 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2111 PyBuffer_Release(&del_table_view);
2112
2113 for (i = inlen; --i >= 0; ) {
2114 c = Py_CHARMASK(*input++);
2115 if (trans_table[c] != -1)
2116 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2117 continue;
2118 changed = 1;
2119 }
2120 if (!changed && PyBytes_CheckExact(input_obj)) {
2121 Py_DECREF(result);
2122 Py_INCREF(input_obj);
2123 return input_obj;
2124 }
2125 /* Fix the size of the resulting string */
2126 if (inlen > 0)
2127 _PyBytes_Resize(&result, output - output_start);
2128 return result;
2129 }
2130
2131
2132 /*[clinic input]
2133
2134 @staticmethod
2135 bytes.maketrans
2136
2137 frm: Py_buffer
2138 to: Py_buffer
2139 /
2140
2141 Return a translation table useable for the bytes or bytearray translate method.
2142
2143 The returned table will be one where each byte in frm is mapped to the byte at
2144 the same position in to.
2145
2146 The bytes objects frm and to must be of the same length.
2147 [clinic start generated code]*/
2148
2149 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2150 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2151 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2152 {
2153 return _Py_bytes_maketrans(frm, to);
2154 }
2155
2156
2157 /*[clinic input]
2158 bytes.replace
2159
2160 old: Py_buffer
2161 new: Py_buffer
2162 count: Py_ssize_t = -1
2163 Maximum number of occurrences to replace.
2164 -1 (the default value) means replace all occurrences.
2165 /
2166
2167 Return a copy with all occurrences of substring old replaced by new.
2168
2169 If the optional argument count is given, only the first count occurrences are
2170 replaced.
2171 [clinic start generated code]*/
2172
2173 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2174 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2175 Py_ssize_t count)
2176 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2177 {
2178 return stringlib_replace((PyObject *)self,
2179 (const char *)old->buf, old->len,
2180 (const char *)new->buf, new->len, count);
2181 }
2182
2183 /** End DALKE **/
2184
2185 /*[clinic input]
2186 bytes.removeprefix as bytes_removeprefix
2187
2188 prefix: Py_buffer
2189 /
2190
2191 Return a bytes object with the given prefix string removed if present.
2192
2193 If the bytes starts with the prefix string, return bytes[len(prefix):].
2194 Otherwise, return a copy of the original bytes.
2195 [clinic start generated code]*/
2196
2197 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2198 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2199 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2200 {
2201 const char *self_start = PyBytes_AS_STRING(self);
2202 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2203 const char *prefix_start = prefix->buf;
2204 Py_ssize_t prefix_len = prefix->len;
2205
2206 if (self_len >= prefix_len
2207 && prefix_len > 0
2208 && memcmp(self_start, prefix_start, prefix_len) == 0)
2209 {
2210 return PyBytes_FromStringAndSize(self_start + prefix_len,
2211 self_len - prefix_len);
2212 }
2213
2214 if (PyBytes_CheckExact(self)) {
2215 Py_INCREF(self);
2216 return (PyObject *)self;
2217 }
2218
2219 return PyBytes_FromStringAndSize(self_start, self_len);
2220 }
2221
2222 /*[clinic input]
2223 bytes.removesuffix as bytes_removesuffix
2224
2225 suffix: Py_buffer
2226 /
2227
2228 Return a bytes object with the given suffix string removed if present.
2229
2230 If the bytes ends with the suffix string and that suffix is not empty,
2231 return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2232 bytes.
2233 [clinic start generated code]*/
2234
2235 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2236 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2237 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2238 {
2239 const char *self_start = PyBytes_AS_STRING(self);
2240 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2241 const char *suffix_start = suffix->buf;
2242 Py_ssize_t suffix_len = suffix->len;
2243
2244 if (self_len >= suffix_len
2245 && suffix_len > 0
2246 && memcmp(self_start + self_len - suffix_len,
2247 suffix_start, suffix_len) == 0)
2248 {
2249 return PyBytes_FromStringAndSize(self_start,
2250 self_len - suffix_len);
2251 }
2252
2253 if (PyBytes_CheckExact(self)) {
2254 Py_INCREF(self);
2255 return (PyObject *)self;
2256 }
2257
2258 return PyBytes_FromStringAndSize(self_start, self_len);
2259 }
2260
2261 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2262 bytes_startswith(PyBytesObject *self, PyObject *args)
2263 {
2264 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2265 }
2266
2267 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2268 bytes_endswith(PyBytesObject *self, PyObject *args)
2269 {
2270 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2271 }
2272
2273
2274 /*[clinic input]
2275 bytes.decode
2276
2277 encoding: str(c_default="NULL") = 'utf-8'
2278 The encoding with which to decode the bytes.
2279 errors: str(c_default="NULL") = 'strict'
2280 The error handling scheme to use for the handling of decoding errors.
2281 The default is 'strict' meaning that decoding errors raise a
2282 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2283 as well as any other name registered with codecs.register_error that
2284 can handle UnicodeDecodeErrors.
2285
2286 Decode the bytes using the codec registered for encoding.
2287 [clinic start generated code]*/
2288
2289 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2290 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2291 const char *errors)
2292 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2293 {
2294 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2295 }
2296
2297
2298 /*[clinic input]
2299 bytes.splitlines
2300
2301 keepends: bool(accept={int}) = False
2302
2303 Return a list of the lines in the bytes, breaking at line boundaries.
2304
2305 Line breaks are not included in the resulting list unless keepends is given and
2306 true.
2307 [clinic start generated code]*/
2308
2309 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2310 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2311 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2312 {
2313 return stringlib_splitlines(
2314 (PyObject*) self, PyBytes_AS_STRING(self),
2315 PyBytes_GET_SIZE(self), keepends
2316 );
2317 }
2318
2319 /*[clinic input]
2320 @classmethod
2321 bytes.fromhex
2322
2323 string: unicode
2324 /
2325
2326 Create a bytes object from a string of hexadecimal numbers.
2327
2328 Spaces between two numbers are accepted.
2329 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2330 [clinic start generated code]*/
2331
2332 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2333 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2334 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2335 {
2336 PyObject *result = _PyBytes_FromHex(string, 0);
2337 if (type != &PyBytes_Type && result != NULL) {
2338 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2339 }
2340 return result;
2341 }
2342
2343 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2344 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2345 {
2346 char *buf;
2347 Py_ssize_t hexlen, invalid_char;
2348 unsigned int top, bot;
2349 const Py_UCS1 *str, *end;
2350 _PyBytesWriter writer;
2351
2352 _PyBytesWriter_Init(&writer);
2353 writer.use_bytearray = use_bytearray;
2354
2355 assert(PyUnicode_Check(string));
2356 if (PyUnicode_READY(string))
2357 return NULL;
2358 hexlen = PyUnicode_GET_LENGTH(string);
2359
2360 if (!PyUnicode_IS_ASCII(string)) {
2361 const void *data = PyUnicode_DATA(string);
2362 unsigned int kind = PyUnicode_KIND(string);
2363 Py_ssize_t i;
2364
2365 /* search for the first non-ASCII character */
2366 for (i = 0; i < hexlen; i++) {
2367 if (PyUnicode_READ(kind, data, i) >= 128)
2368 break;
2369 }
2370 invalid_char = i;
2371 goto error;
2372 }
2373
2374 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2375 str = PyUnicode_1BYTE_DATA(string);
2376
2377 /* This overestimates if there are spaces */
2378 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2379 if (buf == NULL)
2380 return NULL;
2381
2382 end = str + hexlen;
2383 while (str < end) {
2384 /* skip over spaces in the input */
2385 if (Py_ISSPACE(*str)) {
2386 do {
2387 str++;
2388 } while (Py_ISSPACE(*str));
2389 if (str >= end)
2390 break;
2391 }
2392
2393 top = _PyLong_DigitValue[*str];
2394 if (top >= 16) {
2395 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2396 goto error;
2397 }
2398 str++;
2399
2400 bot = _PyLong_DigitValue[*str];
2401 if (bot >= 16) {
2402 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2403 goto error;
2404 }
2405 str++;
2406
2407 *buf++ = (unsigned char)((top << 4) + bot);
2408 }
2409
2410 return _PyBytesWriter_Finish(&writer, buf);
2411
2412 error:
2413 PyErr_Format(PyExc_ValueError,
2414 "non-hexadecimal number found in "
2415 "fromhex() arg at position %zd", invalid_char);
2416 _PyBytesWriter_Dealloc(&writer);
2417 return NULL;
2418 }
2419
2420 /*[clinic input]
2421 bytes.hex
2422
2423 sep: object = NULL
2424 An optional single character or byte to separate hex bytes.
2425 bytes_per_sep: int = 1
2426 How many bytes between separators. Positive values count from the
2427 right, negative values count from the left.
2428
2429 Create a str of hexadecimal numbers from a bytes object.
2430
2431 Example:
2432 >>> value = b'\xb9\x01\xef'
2433 >>> value.hex()
2434 'b901ef'
2435 >>> value.hex(':')
2436 'b9:01:ef'
2437 >>> value.hex(':', 2)
2438 'b9:01ef'
2439 >>> value.hex(':', -2)
2440 'b901:ef'
2441 [clinic start generated code]*/
2442
2443 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2444 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2445 /*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
2446 {
2447 const char *argbuf = PyBytes_AS_STRING(self);
2448 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2449 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2450 }
2451
2452 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2453 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2454 {
2455 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2456 }
2457
2458
2459 static PyMethodDef
2460 bytes_methods[] = {
2461 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2462 {"capitalize", stringlib_capitalize, METH_NOARGS,
2463 _Py_capitalize__doc__},
2464 STRINGLIB_CENTER_METHODDEF
2465 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2466 _Py_count__doc__},
2467 BYTES_DECODE_METHODDEF
2468 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2469 _Py_endswith__doc__},
2470 STRINGLIB_EXPANDTABS_METHODDEF
2471 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2472 _Py_find__doc__},
2473 BYTES_FROMHEX_METHODDEF
2474 BYTES_HEX_METHODDEF
2475 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2476 {"isalnum", stringlib_isalnum, METH_NOARGS,
2477 _Py_isalnum__doc__},
2478 {"isalpha", stringlib_isalpha, METH_NOARGS,
2479 _Py_isalpha__doc__},
2480 {"isascii", stringlib_isascii, METH_NOARGS,
2481 _Py_isascii__doc__},
2482 {"isdigit", stringlib_isdigit, METH_NOARGS,
2483 _Py_isdigit__doc__},
2484 {"islower", stringlib_islower, METH_NOARGS,
2485 _Py_islower__doc__},
2486 {"isspace", stringlib_isspace, METH_NOARGS,
2487 _Py_isspace__doc__},
2488 {"istitle", stringlib_istitle, METH_NOARGS,
2489 _Py_istitle__doc__},
2490 {"isupper", stringlib_isupper, METH_NOARGS,
2491 _Py_isupper__doc__},
2492 BYTES_JOIN_METHODDEF
2493 STRINGLIB_LJUST_METHODDEF
2494 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2495 BYTES_LSTRIP_METHODDEF
2496 BYTES_MAKETRANS_METHODDEF
2497 BYTES_PARTITION_METHODDEF
2498 BYTES_REPLACE_METHODDEF
2499 BYTES_REMOVEPREFIX_METHODDEF
2500 BYTES_REMOVESUFFIX_METHODDEF
2501 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2502 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2503 STRINGLIB_RJUST_METHODDEF
2504 BYTES_RPARTITION_METHODDEF
2505 BYTES_RSPLIT_METHODDEF
2506 BYTES_RSTRIP_METHODDEF
2507 BYTES_SPLIT_METHODDEF
2508 BYTES_SPLITLINES_METHODDEF
2509 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2510 _Py_startswith__doc__},
2511 BYTES_STRIP_METHODDEF
2512 {"swapcase", stringlib_swapcase, METH_NOARGS,
2513 _Py_swapcase__doc__},
2514 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2515 BYTES_TRANSLATE_METHODDEF
2516 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2517 STRINGLIB_ZFILL_METHODDEF
2518 {NULL, NULL} /* sentinel */
2519 };
2520
2521 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2522 bytes_mod(PyObject *self, PyObject *arg)
2523 {
2524 if (!PyBytes_Check(self)) {
2525 Py_RETURN_NOTIMPLEMENTED;
2526 }
2527 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2528 arg, 0);
2529 }
2530
2531 static PyNumberMethods bytes_as_number = {
2532 0, /*nb_add*/
2533 0, /*nb_subtract*/
2534 0, /*nb_multiply*/
2535 bytes_mod, /*nb_remainder*/
2536 };
2537
2538 static PyObject *
2539 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2540
2541 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2542 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2543 {
2544 PyObject *x = NULL;
2545 const char *encoding = NULL;
2546 const char *errors = NULL;
2547 PyObject *new = NULL;
2548 PyObject *func;
2549 Py_ssize_t size;
2550 static char *kwlist[] = {"source", "encoding", "errors", 0};
2551
2552 if (type != &PyBytes_Type)
2553 return bytes_subtype_new(type, args, kwds);
2554 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2555 &encoding, &errors))
2556 return NULL;
2557 if (x == NULL) {
2558 if (encoding != NULL || errors != NULL) {
2559 PyErr_SetString(PyExc_TypeError,
2560 encoding != NULL ?
2561 "encoding without a string argument" :
2562 "errors without a string argument");
2563 return NULL;
2564 }
2565 return PyBytes_FromStringAndSize(NULL, 0);
2566 }
2567
2568 if (encoding != NULL) {
2569 /* Encode via the codec registry */
2570 if (!PyUnicode_Check(x)) {
2571 PyErr_SetString(PyExc_TypeError,
2572 "encoding without a string argument");
2573 return NULL;
2574 }
2575 new = PyUnicode_AsEncodedString(x, encoding, errors);
2576 if (new == NULL)
2577 return NULL;
2578 assert(PyBytes_Check(new));
2579 return new;
2580 }
2581
2582 if (errors != NULL) {
2583 PyErr_SetString(PyExc_TypeError,
2584 PyUnicode_Check(x) ?
2585 "string argument without an encoding" :
2586 "errors without a string argument");
2587 return NULL;
2588 }
2589
2590 /* We'd like to call PyObject_Bytes here, but we need to check for an
2591 integer argument before deferring to PyBytes_FromObject, something
2592 PyObject_Bytes doesn't do. */
2593 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2594 if (func != NULL) {
2595 new = _PyObject_CallNoArg(func);
2596 Py_DECREF(func);
2597 if (new == NULL)
2598 return NULL;
2599 if (!PyBytes_Check(new)) {
2600 PyErr_Format(PyExc_TypeError,
2601 "__bytes__ returned non-bytes (type %.200s)",
2602 Py_TYPE(new)->tp_name);
2603 Py_DECREF(new);
2604 return NULL;
2605 }
2606 return new;
2607 }
2608 else if (PyErr_Occurred())
2609 return NULL;
2610
2611 if (PyUnicode_Check(x)) {
2612 PyErr_SetString(PyExc_TypeError,
2613 "string argument without an encoding");
2614 return NULL;
2615 }
2616 /* Is it an integer? */
2617 if (_PyIndex_Check(x)) {
2618 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2619 if (size == -1 && PyErr_Occurred()) {
2620 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2621 return NULL;
2622 PyErr_Clear(); /* fall through */
2623 }
2624 else {
2625 if (size < 0) {
2626 PyErr_SetString(PyExc_ValueError, "negative count");
2627 return NULL;
2628 }
2629 new = _PyBytes_FromSize(size, 1);
2630 if (new == NULL)
2631 return NULL;
2632 return new;
2633 }
2634 }
2635
2636 return PyBytes_FromObject(x);
2637 }
2638
2639 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2640 _PyBytes_FromBuffer(PyObject *x)
2641 {
2642 PyObject *new;
2643 Py_buffer view;
2644
2645 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2646 return NULL;
2647
2648 new = PyBytes_FromStringAndSize(NULL, view.len);
2649 if (!new)
2650 goto fail;
2651 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2652 &view, view.len, 'C') < 0)
2653 goto fail;
2654 PyBuffer_Release(&view);
2655 return new;
2656
2657 fail:
2658 Py_XDECREF(new);
2659 PyBuffer_Release(&view);
2660 return NULL;
2661 }
2662
2663 static PyObject*
_PyBytes_FromList(PyObject * x)2664 _PyBytes_FromList(PyObject *x)
2665 {
2666 Py_ssize_t i, size = PyList_GET_SIZE(x);
2667 Py_ssize_t value;
2668 char *str;
2669 PyObject *item;
2670 _PyBytesWriter writer;
2671
2672 _PyBytesWriter_Init(&writer);
2673 str = _PyBytesWriter_Alloc(&writer, size);
2674 if (str == NULL)
2675 return NULL;
2676 writer.overallocate = 1;
2677 size = writer.allocated;
2678
2679 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2680 item = PyList_GET_ITEM(x, i);
2681 Py_INCREF(item);
2682 value = PyNumber_AsSsize_t(item, NULL);
2683 Py_DECREF(item);
2684 if (value == -1 && PyErr_Occurred())
2685 goto error;
2686
2687 if (value < 0 || value >= 256) {
2688 PyErr_SetString(PyExc_ValueError,
2689 "bytes must be in range(0, 256)");
2690 goto error;
2691 }
2692
2693 if (i >= size) {
2694 str = _PyBytesWriter_Resize(&writer, str, size+1);
2695 if (str == NULL)
2696 return NULL;
2697 size = writer.allocated;
2698 }
2699 *str++ = (char) value;
2700 }
2701 return _PyBytesWriter_Finish(&writer, str);
2702
2703 error:
2704 _PyBytesWriter_Dealloc(&writer);
2705 return NULL;
2706 }
2707
2708 static PyObject*
_PyBytes_FromTuple(PyObject * x)2709 _PyBytes_FromTuple(PyObject *x)
2710 {
2711 PyObject *bytes;
2712 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2713 Py_ssize_t value;
2714 char *str;
2715 PyObject *item;
2716
2717 bytes = PyBytes_FromStringAndSize(NULL, size);
2718 if (bytes == NULL)
2719 return NULL;
2720 str = ((PyBytesObject *)bytes)->ob_sval;
2721
2722 for (i = 0; i < size; i++) {
2723 item = PyTuple_GET_ITEM(x, i);
2724 value = PyNumber_AsSsize_t(item, NULL);
2725 if (value == -1 && PyErr_Occurred())
2726 goto error;
2727
2728 if (value < 0 || value >= 256) {
2729 PyErr_SetString(PyExc_ValueError,
2730 "bytes must be in range(0, 256)");
2731 goto error;
2732 }
2733 *str++ = (char) value;
2734 }
2735 return bytes;
2736
2737 error:
2738 Py_DECREF(bytes);
2739 return NULL;
2740 }
2741
2742 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2743 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2744 {
2745 char *str;
2746 Py_ssize_t i, size;
2747 _PyBytesWriter writer;
2748
2749 /* For iterator version, create a string object and resize as needed */
2750 size = PyObject_LengthHint(x, 64);
2751 if (size == -1 && PyErr_Occurred())
2752 return NULL;
2753
2754 _PyBytesWriter_Init(&writer);
2755 str = _PyBytesWriter_Alloc(&writer, size);
2756 if (str == NULL)
2757 return NULL;
2758 writer.overallocate = 1;
2759 size = writer.allocated;
2760
2761 /* Run the iterator to exhaustion */
2762 for (i = 0; ; i++) {
2763 PyObject *item;
2764 Py_ssize_t value;
2765
2766 /* Get the next item */
2767 item = PyIter_Next(it);
2768 if (item == NULL) {
2769 if (PyErr_Occurred())
2770 goto error;
2771 break;
2772 }
2773
2774 /* Interpret it as an int (__index__) */
2775 value = PyNumber_AsSsize_t(item, NULL);
2776 Py_DECREF(item);
2777 if (value == -1 && PyErr_Occurred())
2778 goto error;
2779
2780 /* Range check */
2781 if (value < 0 || value >= 256) {
2782 PyErr_SetString(PyExc_ValueError,
2783 "bytes must be in range(0, 256)");
2784 goto error;
2785 }
2786
2787 /* Append the byte */
2788 if (i >= size) {
2789 str = _PyBytesWriter_Resize(&writer, str, size+1);
2790 if (str == NULL)
2791 return NULL;
2792 size = writer.allocated;
2793 }
2794 *str++ = (char) value;
2795 }
2796
2797 return _PyBytesWriter_Finish(&writer, str);
2798
2799 error:
2800 _PyBytesWriter_Dealloc(&writer);
2801 return NULL;
2802 }
2803
2804 PyObject *
PyBytes_FromObject(PyObject * x)2805 PyBytes_FromObject(PyObject *x)
2806 {
2807 PyObject *it, *result;
2808
2809 if (x == NULL) {
2810 PyErr_BadInternalCall();
2811 return NULL;
2812 }
2813
2814 if (PyBytes_CheckExact(x)) {
2815 Py_INCREF(x);
2816 return x;
2817 }
2818
2819 /* Use the modern buffer interface */
2820 if (PyObject_CheckBuffer(x))
2821 return _PyBytes_FromBuffer(x);
2822
2823 if (PyList_CheckExact(x))
2824 return _PyBytes_FromList(x);
2825
2826 if (PyTuple_CheckExact(x))
2827 return _PyBytes_FromTuple(x);
2828
2829 if (!PyUnicode_Check(x)) {
2830 it = PyObject_GetIter(x);
2831 if (it != NULL) {
2832 result = _PyBytes_FromIterator(it, x);
2833 Py_DECREF(it);
2834 return result;
2835 }
2836 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2837 return NULL;
2838 }
2839 }
2840
2841 PyErr_Format(PyExc_TypeError,
2842 "cannot convert '%.200s' object to bytes",
2843 Py_TYPE(x)->tp_name);
2844 return NULL;
2845 }
2846
2847 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2848 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2849 {
2850 PyObject *tmp, *pnew;
2851 Py_ssize_t n;
2852
2853 assert(PyType_IsSubtype(type, &PyBytes_Type));
2854 tmp = bytes_new(&PyBytes_Type, args, kwds);
2855 if (tmp == NULL)
2856 return NULL;
2857 assert(PyBytes_Check(tmp));
2858 n = PyBytes_GET_SIZE(tmp);
2859 pnew = type->tp_alloc(type, n);
2860 if (pnew != NULL) {
2861 memcpy(PyBytes_AS_STRING(pnew),
2862 PyBytes_AS_STRING(tmp), n+1);
2863 ((PyBytesObject *)pnew)->ob_shash =
2864 ((PyBytesObject *)tmp)->ob_shash;
2865 }
2866 Py_DECREF(tmp);
2867 return pnew;
2868 }
2869
2870 PyDoc_STRVAR(bytes_doc,
2871 "bytes(iterable_of_ints) -> bytes\n\
2872 bytes(string, encoding[, errors]) -> bytes\n\
2873 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2874 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2875 bytes() -> empty bytes object\n\
2876 \n\
2877 Construct an immutable array of bytes from:\n\
2878 - an iterable yielding integers in range(256)\n\
2879 - a text string encoded using the specified encoding\n\
2880 - any object implementing the buffer API.\n\
2881 - an integer");
2882
2883 static PyObject *bytes_iter(PyObject *seq);
2884
2885 PyTypeObject PyBytes_Type = {
2886 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2887 "bytes",
2888 PyBytesObject_SIZE,
2889 sizeof(char),
2890 0, /* tp_dealloc */
2891 0, /* tp_vectorcall_offset */
2892 0, /* tp_getattr */
2893 0, /* tp_setattr */
2894 0, /* tp_as_async */
2895 (reprfunc)bytes_repr, /* tp_repr */
2896 &bytes_as_number, /* tp_as_number */
2897 &bytes_as_sequence, /* tp_as_sequence */
2898 &bytes_as_mapping, /* tp_as_mapping */
2899 (hashfunc)bytes_hash, /* tp_hash */
2900 0, /* tp_call */
2901 bytes_str, /* tp_str */
2902 PyObject_GenericGetAttr, /* tp_getattro */
2903 0, /* tp_setattro */
2904 &bytes_as_buffer, /* tp_as_buffer */
2905 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2906 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2907 bytes_doc, /* tp_doc */
2908 0, /* tp_traverse */
2909 0, /* tp_clear */
2910 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2911 0, /* tp_weaklistoffset */
2912 bytes_iter, /* tp_iter */
2913 0, /* tp_iternext */
2914 bytes_methods, /* tp_methods */
2915 0, /* tp_members */
2916 0, /* tp_getset */
2917 &PyBaseObject_Type, /* tp_base */
2918 0, /* tp_dict */
2919 0, /* tp_descr_get */
2920 0, /* tp_descr_set */
2921 0, /* tp_dictoffset */
2922 0, /* tp_init */
2923 0, /* tp_alloc */
2924 bytes_new, /* tp_new */
2925 PyObject_Del, /* tp_free */
2926 };
2927
2928 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2929 PyBytes_Concat(PyObject **pv, PyObject *w)
2930 {
2931 assert(pv != NULL);
2932 if (*pv == NULL)
2933 return;
2934 if (w == NULL) {
2935 Py_CLEAR(*pv);
2936 return;
2937 }
2938
2939 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2940 /* Only one reference, so we can resize in place */
2941 Py_ssize_t oldsize;
2942 Py_buffer wb;
2943
2944 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2945 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2946 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2947 Py_CLEAR(*pv);
2948 return;
2949 }
2950
2951 oldsize = PyBytes_GET_SIZE(*pv);
2952 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2953 PyErr_NoMemory();
2954 goto error;
2955 }
2956 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2957 goto error;
2958
2959 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2960 PyBuffer_Release(&wb);
2961 return;
2962
2963 error:
2964 PyBuffer_Release(&wb);
2965 Py_CLEAR(*pv);
2966 return;
2967 }
2968
2969 else {
2970 /* Multiple references, need to create new object */
2971 PyObject *v;
2972 v = bytes_concat(*pv, w);
2973 Py_SETREF(*pv, v);
2974 }
2975 }
2976
2977 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2978 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2979 {
2980 PyBytes_Concat(pv, w);
2981 Py_XDECREF(w);
2982 }
2983
2984
2985 /* The following function breaks the notion that bytes are immutable:
2986 it changes the size of a bytes object. We get away with this only if there
2987 is only one module referencing the object. You can also think of it
2988 as creating a new bytes object and destroying the old one, only
2989 more efficiently. In any case, don't use this if the bytes object may
2990 already be known to some other part of the code...
2991 Note that if there's not enough memory to resize the bytes object, the
2992 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2993 memory" exception is set, and -1 is returned. Else (on success) 0 is
2994 returned, and the value in *pv may or may not be the same as on input.
2995 As always, an extra byte is allocated for a trailing \0 byte (newsize
2996 does *not* include that), and a trailing \0 byte is stored.
2997 */
2998
2999 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3000 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3001 {
3002 PyObject *v;
3003 PyBytesObject *sv;
3004 v = *pv;
3005 if (!PyBytes_Check(v) || newsize < 0) {
3006 goto error;
3007 }
3008 if (Py_SIZE(v) == newsize) {
3009 /* return early if newsize equals to v->ob_size */
3010 return 0;
3011 }
3012 if (Py_SIZE(v) == 0) {
3013 if (newsize == 0) {
3014 return 0;
3015 }
3016 *pv = _PyBytes_FromSize(newsize, 0);
3017 Py_DECREF(v);
3018 return (*pv == NULL) ? -1 : 0;
3019 }
3020 if (Py_REFCNT(v) != 1) {
3021 goto error;
3022 }
3023 if (newsize == 0) {
3024 *pv = _PyBytes_FromSize(0, 0);
3025 Py_DECREF(v);
3026 return (*pv == NULL) ? -1 : 0;
3027 }
3028 /* XXX UNREF/NEWREF interface should be more symmetrical */
3029 #ifdef Py_REF_DEBUG
3030 _Py_RefTotal--;
3031 #endif
3032 #ifdef Py_TRACE_REFS
3033 _Py_ForgetReference(v);
3034 #endif
3035 *pv = (PyObject *)
3036 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3037 if (*pv == NULL) {
3038 PyObject_Del(v);
3039 PyErr_NoMemory();
3040 return -1;
3041 }
3042 _Py_NewReference(*pv);
3043 sv = (PyBytesObject *) *pv;
3044 Py_SET_SIZE(sv, newsize);
3045 sv->ob_sval[newsize] = '\0';
3046 sv->ob_shash = -1; /* invalidate cached hash value */
3047 return 0;
3048 error:
3049 *pv = 0;
3050 Py_DECREF(v);
3051 PyErr_BadInternalCall();
3052 return -1;
3053 }
3054
3055 void
_PyBytes_Fini(void)3056 _PyBytes_Fini(void)
3057 {
3058 int i;
3059 for (i = 0; i < UCHAR_MAX + 1; i++)
3060 Py_CLEAR(characters[i]);
3061 Py_CLEAR(nullstring);
3062 }
3063
3064 /*********************** Bytes Iterator ****************************/
3065
3066 typedef struct {
3067 PyObject_HEAD
3068 Py_ssize_t it_index;
3069 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3070 } striterobject;
3071
3072 static void
striter_dealloc(striterobject * it)3073 striter_dealloc(striterobject *it)
3074 {
3075 _PyObject_GC_UNTRACK(it);
3076 Py_XDECREF(it->it_seq);
3077 PyObject_GC_Del(it);
3078 }
3079
3080 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3081 striter_traverse(striterobject *it, visitproc visit, void *arg)
3082 {
3083 Py_VISIT(it->it_seq);
3084 return 0;
3085 }
3086
3087 static PyObject *
striter_next(striterobject * it)3088 striter_next(striterobject *it)
3089 {
3090 PyBytesObject *seq;
3091 PyObject *item;
3092
3093 assert(it != NULL);
3094 seq = it->it_seq;
3095 if (seq == NULL)
3096 return NULL;
3097 assert(PyBytes_Check(seq));
3098
3099 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3100 item = PyLong_FromLong(
3101 (unsigned char)seq->ob_sval[it->it_index]);
3102 if (item != NULL)
3103 ++it->it_index;
3104 return item;
3105 }
3106
3107 it->it_seq = NULL;
3108 Py_DECREF(seq);
3109 return NULL;
3110 }
3111
3112 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3113 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3114 {
3115 Py_ssize_t len = 0;
3116 if (it->it_seq)
3117 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3118 return PyLong_FromSsize_t(len);
3119 }
3120
3121 PyDoc_STRVAR(length_hint_doc,
3122 "Private method returning an estimate of len(list(it)).");
3123
3124 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3125 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3126 {
3127 _Py_IDENTIFIER(iter);
3128 if (it->it_seq != NULL) {
3129 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3130 it->it_seq, it->it_index);
3131 } else {
3132 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3133 }
3134 }
3135
3136 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3137
3138 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3139 striter_setstate(striterobject *it, PyObject *state)
3140 {
3141 Py_ssize_t index = PyLong_AsSsize_t(state);
3142 if (index == -1 && PyErr_Occurred())
3143 return NULL;
3144 if (it->it_seq != NULL) {
3145 if (index < 0)
3146 index = 0;
3147 else if (index > PyBytes_GET_SIZE(it->it_seq))
3148 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3149 it->it_index = index;
3150 }
3151 Py_RETURN_NONE;
3152 }
3153
3154 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3155
3156 static PyMethodDef striter_methods[] = {
3157 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3158 length_hint_doc},
3159 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3160 reduce_doc},
3161 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3162 setstate_doc},
3163 {NULL, NULL} /* sentinel */
3164 };
3165
3166 PyTypeObject PyBytesIter_Type = {
3167 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3168 "bytes_iterator", /* tp_name */
3169 sizeof(striterobject), /* tp_basicsize */
3170 0, /* tp_itemsize */
3171 /* methods */
3172 (destructor)striter_dealloc, /* tp_dealloc */
3173 0, /* tp_vectorcall_offset */
3174 0, /* tp_getattr */
3175 0, /* tp_setattr */
3176 0, /* tp_as_async */
3177 0, /* tp_repr */
3178 0, /* tp_as_number */
3179 0, /* tp_as_sequence */
3180 0, /* tp_as_mapping */
3181 0, /* tp_hash */
3182 0, /* tp_call */
3183 0, /* tp_str */
3184 PyObject_GenericGetAttr, /* tp_getattro */
3185 0, /* tp_setattro */
3186 0, /* tp_as_buffer */
3187 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3188 0, /* tp_doc */
3189 (traverseproc)striter_traverse, /* tp_traverse */
3190 0, /* tp_clear */
3191 0, /* tp_richcompare */
3192 0, /* tp_weaklistoffset */
3193 PyObject_SelfIter, /* tp_iter */
3194 (iternextfunc)striter_next, /* tp_iternext */
3195 striter_methods, /* tp_methods */
3196 0,
3197 };
3198
3199 static PyObject *
bytes_iter(PyObject * seq)3200 bytes_iter(PyObject *seq)
3201 {
3202 striterobject *it;
3203
3204 if (!PyBytes_Check(seq)) {
3205 PyErr_BadInternalCall();
3206 return NULL;
3207 }
3208 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3209 if (it == NULL)
3210 return NULL;
3211 it->it_index = 0;
3212 Py_INCREF(seq);
3213 it->it_seq = (PyBytesObject *)seq;
3214 _PyObject_GC_TRACK(it);
3215 return (PyObject *)it;
3216 }
3217
3218
3219 /* _PyBytesWriter API */
3220
3221 #ifdef MS_WINDOWS
3222 /* On Windows, overallocate by 50% is the best factor */
3223 # define OVERALLOCATE_FACTOR 2
3224 #else
3225 /* On Linux, overallocate by 25% is the best factor */
3226 # define OVERALLOCATE_FACTOR 4
3227 #endif
3228
3229 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3230 _PyBytesWriter_Init(_PyBytesWriter *writer)
3231 {
3232 /* Set all attributes before small_buffer to 0 */
3233 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3234 #ifndef NDEBUG
3235 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3236 sizeof(writer->small_buffer));
3237 #endif
3238 }
3239
3240 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3241 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3242 {
3243 Py_CLEAR(writer->buffer);
3244 }
3245
3246 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3247 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3248 {
3249 if (writer->use_small_buffer) {
3250 assert(writer->buffer == NULL);
3251 return writer->small_buffer;
3252 }
3253 else if (writer->use_bytearray) {
3254 assert(writer->buffer != NULL);
3255 return PyByteArray_AS_STRING(writer->buffer);
3256 }
3257 else {
3258 assert(writer->buffer != NULL);
3259 return PyBytes_AS_STRING(writer->buffer);
3260 }
3261 }
3262
3263 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3264 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3265 {
3266 const char *start = _PyBytesWriter_AsString(writer);
3267 assert(str != NULL);
3268 assert(str >= start);
3269 assert(str - start <= writer->allocated);
3270 return str - start;
3271 }
3272
3273 #ifndef NDEBUG
3274 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3275 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3276 {
3277 const char *start, *end;
3278
3279 if (writer->use_small_buffer) {
3280 assert(writer->buffer == NULL);
3281 }
3282 else {
3283 assert(writer->buffer != NULL);
3284 if (writer->use_bytearray)
3285 assert(PyByteArray_CheckExact(writer->buffer));
3286 else
3287 assert(PyBytes_CheckExact(writer->buffer));
3288 assert(Py_REFCNT(writer->buffer) == 1);
3289 }
3290
3291 if (writer->use_bytearray) {
3292 /* bytearray has its own overallocation algorithm,
3293 writer overallocation must be disabled */
3294 assert(!writer->overallocate);
3295 }
3296
3297 assert(0 <= writer->allocated);
3298 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3299 /* the last byte must always be null */
3300 start = _PyBytesWriter_AsString(writer);
3301 assert(start[writer->allocated] == 0);
3302
3303 end = start + writer->allocated;
3304 assert(str != NULL);
3305 assert(start <= str && str <= end);
3306 return 1;
3307 }
3308 #endif
3309
3310 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3311 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3312 {
3313 Py_ssize_t allocated, pos;
3314
3315 assert(_PyBytesWriter_CheckConsistency(writer, str));
3316 assert(writer->allocated < size);
3317
3318 allocated = size;
3319 if (writer->overallocate
3320 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3321 /* overallocate to limit the number of realloc() */
3322 allocated += allocated / OVERALLOCATE_FACTOR;
3323 }
3324
3325 pos = _PyBytesWriter_GetSize(writer, str);
3326 if (!writer->use_small_buffer) {
3327 if (writer->use_bytearray) {
3328 if (PyByteArray_Resize(writer->buffer, allocated))
3329 goto error;
3330 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3331 but we cannot use ob_alloc because bytes may need to be moved
3332 to use the whole buffer. bytearray uses an internal optimization
3333 to avoid moving or copying bytes when bytes are removed at the
3334 beginning (ex: del bytearray[:1]). */
3335 }
3336 else {
3337 if (_PyBytes_Resize(&writer->buffer, allocated))
3338 goto error;
3339 }
3340 }
3341 else {
3342 /* convert from stack buffer to bytes object buffer */
3343 assert(writer->buffer == NULL);
3344
3345 if (writer->use_bytearray)
3346 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3347 else
3348 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3349 if (writer->buffer == NULL)
3350 goto error;
3351
3352 if (pos != 0) {
3353 char *dest;
3354 if (writer->use_bytearray)
3355 dest = PyByteArray_AS_STRING(writer->buffer);
3356 else
3357 dest = PyBytes_AS_STRING(writer->buffer);
3358 memcpy(dest,
3359 writer->small_buffer,
3360 pos);
3361 }
3362
3363 writer->use_small_buffer = 0;
3364 #ifndef NDEBUG
3365 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3366 sizeof(writer->small_buffer));
3367 #endif
3368 }
3369 writer->allocated = allocated;
3370
3371 str = _PyBytesWriter_AsString(writer) + pos;
3372 assert(_PyBytesWriter_CheckConsistency(writer, str));
3373 return str;
3374
3375 error:
3376 _PyBytesWriter_Dealloc(writer);
3377 return NULL;
3378 }
3379
3380 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3381 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3382 {
3383 Py_ssize_t new_min_size;
3384
3385 assert(_PyBytesWriter_CheckConsistency(writer, str));
3386 assert(size >= 0);
3387
3388 if (size == 0) {
3389 /* nothing to do */
3390 return str;
3391 }
3392
3393 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3394 PyErr_NoMemory();
3395 _PyBytesWriter_Dealloc(writer);
3396 return NULL;
3397 }
3398 new_min_size = writer->min_size + size;
3399
3400 if (new_min_size > writer->allocated)
3401 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3402
3403 writer->min_size = new_min_size;
3404 return str;
3405 }
3406
3407 /* Allocate the buffer to write size bytes.
3408 Return the pointer to the beginning of buffer data.
3409 Raise an exception and return NULL on error. */
3410 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3411 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3412 {
3413 /* ensure that _PyBytesWriter_Alloc() is only called once */
3414 assert(writer->min_size == 0 && writer->buffer == NULL);
3415 assert(size >= 0);
3416
3417 writer->use_small_buffer = 1;
3418 #ifndef NDEBUG
3419 writer->allocated = sizeof(writer->small_buffer) - 1;
3420 /* In debug mode, don't use the full small buffer because it is less
3421 efficient than bytes and bytearray objects to detect buffer underflow
3422 and buffer overflow. Use 10 bytes of the small buffer to test also
3423 code using the smaller buffer in debug mode.
3424
3425 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3426 in debug mode to also be able to detect stack overflow when running
3427 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3428 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3429 stack overflow. */
3430 writer->allocated = Py_MIN(writer->allocated, 10);
3431 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3432 to detect buffer overflow */
3433 writer->small_buffer[writer->allocated] = 0;
3434 #else
3435 writer->allocated = sizeof(writer->small_buffer);
3436 #endif
3437 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3438 }
3439
3440 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3441 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3442 {
3443 Py_ssize_t size;
3444 PyObject *result;
3445
3446 assert(_PyBytesWriter_CheckConsistency(writer, str));
3447
3448 size = _PyBytesWriter_GetSize(writer, str);
3449 if (size == 0 && !writer->use_bytearray) {
3450 Py_CLEAR(writer->buffer);
3451 /* Get the empty byte string singleton */
3452 result = PyBytes_FromStringAndSize(NULL, 0);
3453 }
3454 else if (writer->use_small_buffer) {
3455 if (writer->use_bytearray) {
3456 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3457 }
3458 else {
3459 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3460 }
3461 }
3462 else {
3463 result = writer->buffer;
3464 writer->buffer = NULL;
3465
3466 if (size != writer->allocated) {
3467 if (writer->use_bytearray) {
3468 if (PyByteArray_Resize(result, size)) {
3469 Py_DECREF(result);
3470 return NULL;
3471 }
3472 }
3473 else {
3474 if (_PyBytes_Resize(&result, size)) {
3475 assert(result == NULL);
3476 return NULL;
3477 }
3478 }
3479 }
3480 }
3481 return result;
3482 }
3483
3484 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3485 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3486 const void *bytes, Py_ssize_t size)
3487 {
3488 char *str = (char *)ptr;
3489
3490 str = _PyBytesWriter_Prepare(writer, str, size);
3491 if (str == NULL)
3492 return NULL;
3493
3494 memcpy(str, bytes, size);
3495 str += size;
3496
3497 return str;
3498 }
3499